Azure · hakotesova · Oct 6, 2025 · Oct 6, 2025 · Oct 6, 2025 · Oct 6, 2025
@@ -4,6 +4,7 @@
 import os
 import urllib
 from zipfile import ZipFile
+from pathlib import Path
 
 from azure.identity import DefaultAzureCredential
 from azure.ai.ml import MLClient
@@ -103,28 +104,22 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # Download data
-    print("Downloading data.")
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
+    # Local data
+    repo_root = Path(__file__).resolve().parents[6]
+    local_data_path = (
+        repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"
+    )
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
     # Extract files
-    with ZipFile(data_file, "r") as zip:
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # Delete zip file
-    os.remove(data_file)
 
     # Upload data and create a data asset URI folder
     print("Uploading data to blob storage")

@@ -4,6 +4,7 @@
 import os
 import urllib
 from zipfile import ZipFile
+from pathlib import Path
 
 from azure.identity import DefaultAzureCredential
 from azure.ai.ml import MLClient
@@ -106,28 +107,25 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # Download data
-    print("Downloading data.")
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip"
+    # Local data
+    repo_root = Path(__file__).resolve().parents[6]
+    local_data_path = (
+        repo_root
+        / "sample-data"
+        / "image-classification"
+        / "multilabelFridgeObjects.zip"
+    )
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
     # Extract files
-    with ZipFile(data_file, "r") as zip:
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # Delete zip file
-    os.remove(data_file)
 
     # Upload data and create a data asset URI folder
     print("Uploading data to blob storage")

@@ -2,6 +2,7 @@
 import base64
 import json
 import os
+from pathlib import Path
 import subprocess
 import sys
 import urllib
@@ -156,27 +157,25 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     # Create data folder if it doesnt exist.
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # Download data
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip"
+    # Local data
+    repo_root = Path(__file__).resolve().parents[5]
+    local_data_path = (
+        repo_root
+        / "sample-data"
+        / "image-instance-segmentation"
+        / "odFridgeObjectsMask.zip"
+    )
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
-    # Get the data zip file path
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download the dataset
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
     # Extract files
-    with ZipFile(data_file, "r") as zip:
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # Delete zip file
-    os.remove(data_file)
 
     # Upload data and create a data asset URI folder
     print("Uploading data to blob storage")

@@ -6,6 +6,7 @@
 import xml.etree.ElementTree as ET
 
 from zipfile import ZipFile
+from pathlib import Path
 
 from azure.identity import DefaultAzureCredential
 from azure.ai.ml import MLClient
@@ -159,27 +160,23 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     # create data folder if it doesnt exist.
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip"
+    # Local data
+    repo_root = Path(__file__).resolve().parents[5]
+    local_data_path = (
+        repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"
+    )
+    print(f"Using local data from {local_data_path}")
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
-    # Get the data zip file path
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download the dataset
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
     # extract files
-    with ZipFile(data_file, "r") as zip:
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
 
     # Upload data and create a data asset URI folder
     print("Uploading data to blob storage")

@@ -6,6 +6,7 @@
 import urllib.request
 import pandas as pd
 from zipfile import ZipFile
+from pathlib import Path
 
 
 def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> None:
@@ -20,33 +21,33 @@ def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> N
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
     # download data
+    repo_root = Path(__file__).resolve().parents[5]
     if is_multilabel_dataset == 0:
-        download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
+        local_data_path = (
+            repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"
+        )
     else:
-        download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip"
-    print(f"Downloading data from {download_url}")
+        local_data_path = (
+            repo_root
+            / "sample-data"
+            / "image-classification"
+            / "multilabelFridgeObjects.zip"
+        )
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
     if os.path.exists(dataset_dir):
         shutil.rmtree(dataset_dir)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
     # extract files
-    with ZipFile(data_file, "r") as zip:
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
+
     return dataset_dir
 
 

@@ -6,6 +6,7 @@
 import urllib.request
 import pandas as pd
 from zipfile import ZipFile
+from pathlib import Path
 import random
 import string
 
@@ -19,31 +20,23 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
-    print(f"Downloading data from {download_url}")
+    # Local data
+    repo_root = Path(__file__).resolve().parents[5]
+    local_data_path = (
+        repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"
+    )
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
-    if os.path.exists(dataset_dir):
-        shutil.rmtree(dataset_dir)
-
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
     # extract files
-    with ZipFile(data_file, "r") as zip:
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
+
     return dataset_dir
 
 

@@ -2,6 +2,7 @@
 import base64
 import json
 import os
+from pathlib import Path
 import shutil
 import urllib.request
 import pandas as pd
@@ -17,32 +18,25 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
-
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip"
-    print(f"Downloading data from {download_url}")
+    # Local data
+    repo_root = Path(__file__).resolve().parents[5]
+    local_data_path = (
+        repo_root
+        / "sample-data"
+        / "image-instance-segmentation"
+        / "odFridgeObjectsMask.zip"
+    )
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
-    if os.path.exists(dataset_dir):
-        shutil.rmtree(dataset_dir)
-
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
-    # extract files
-    with ZipFile(data_file, "r") as zip:
+    # Extract files
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
     return dataset_dir
 
 

@@ -6,6 +6,7 @@
 import urllib.request
 import pandas as pd
 from zipfile import ZipFile
+from pathlib import Path
 
 
 def download_and_unzip(dataset_parent_dir: str) -> None:
@@ -17,32 +18,22 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
-
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip"
-    print(f"Downloading data from {download_url}")
+    # Local data
+    repo_root = Path(__file__).resolve().parents[5]
+    local_data_path = (
+        repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip"
+    )
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
-    if os.path.exists(dataset_dir):
-        shutil.rmtree(dataset_dir)
-
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
-    # extract files
-    with ZipFile(data_file, "r") as zip:
+    # Extract files
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
     return dataset_dir