Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import urllib
from zipfile import ZipFile
from pathlib import Path

from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient
Expand Down Expand Up @@ -103,28 +104,22 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# Download data
print("Downloading data.")
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
# Local data
repo_root = Path(__file__).resolve().parents[6]
local_data_path = (
repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"
)

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# Extract files
with ZipFile(data_file, "r") as zip:
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# Delete zip file
os.remove(data_file)

# Upload data and create a data asset URI folder
print("Uploading data to blob storage")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import urllib
from zipfile import ZipFile
from pathlib import Path

from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient
Expand Down Expand Up @@ -106,28 +107,25 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# Download data
print("Downloading data.")
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip"
# Local data
repo_root = Path(__file__).resolve().parents[6]
local_data_path = (
repo_root
/ "sample-data"
/ "image-classification"
/ "multilabelFridgeObjects.zip"
)

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# Extract files
with ZipFile(data_file, "r") as zip:
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# Delete zip file
os.remove(data_file)

# Upload data and create a data asset URI folder
print("Uploading data to blob storage")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import base64
import json
import os
from pathlib import Path
import subprocess
import sys
import urllib
Expand Down Expand Up @@ -156,27 +157,25 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
# Create data folder if it doesnt exist.
os.makedirs(dataset_parent_dir, exist_ok=True)

# Download data
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip"
# Local data
repo_root = Path(__file__).resolve().parents[5]
local_data_path = (
repo_root
/ "sample-data"
/ "image-instance-segmentation"
/ "odFridgeObjectsMask.zip"
)

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

# Get the data zip file path
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download the dataset
urllib.request.urlretrieve(download_url, filename=data_file)

# Extract files
with ZipFile(data_file, "r") as zip:
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# Delete zip file
os.remove(data_file)

# Upload data and create a data asset URI folder
print("Uploading data to blob storage")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import xml.etree.ElementTree as ET

from zipfile import ZipFile
from pathlib import Path

from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient
Expand Down Expand Up @@ -159,27 +160,23 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
# create data folder if it doesnt exist.
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip"
# Local data
repo_root = Path(__file__).resolve().parents[5]
local_data_path = (
repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"
)
print(f"Using local data from {local_data_path}")

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

# Get the data zip file path
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download the dataset
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)

# Upload data and create a data asset URI folder
print("Uploading data to blob storage")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import urllib.request
import pandas as pd
from zipfile import ZipFile
from pathlib import Path


def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> None:
Expand All @@ -20,33 +21,33 @@ def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> N
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
repo_root = Path(__file__).resolve().parents[5]
if is_multilabel_dataset == 0:
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
local_data_path = (
repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"
)
else:
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip"
print(f"Downloading data from {download_url}")
local_data_path = (
repo_root
/ "sample-data"
/ "image-classification"
/ "multilabelFridgeObjects.zip"
)

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

if os.path.exists(dataset_dir):
shutil.rmtree(dataset_dir)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)

return dataset_dir


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import urllib.request
import pandas as pd
from zipfile import ZipFile
from pathlib import Path
import random
import string

Expand All @@ -19,31 +20,23 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
print(f"Downloading data from {download_url}")
# Local data
repo_root = Path(__file__).resolve().parents[5]
local_data_path = (
repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"
)

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

if os.path.exists(dataset_dir):
shutil.rmtree(dataset_dir)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)

return dataset_dir


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import base64
import json
import os
from pathlib import Path
import shutil
import urllib.request
import pandas as pd
Expand All @@ -17,32 +18,25 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data

download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip"
print(f"Downloading data from {download_url}")
# Local data
repo_root = Path(__file__).resolve().parents[5]
local_data_path = (
repo_root
/ "sample-data"
/ "image-instance-segmentation"
/ "odFridgeObjectsMask.zip"
)

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

if os.path.exists(dataset_dir):
shutil.rmtree(dataset_dir)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
# Extract files
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)
return dataset_dir


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import urllib.request
import pandas as pd
from zipfile import ZipFile
from pathlib import Path


def download_and_unzip(dataset_parent_dir: str) -> None:
Expand All @@ -17,32 +18,22 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data

download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip"
print(f"Downloading data from {download_url}")
# Local data
repo_root = Path(__file__).resolve().parents[5]
local_data_path = (
repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip"
)

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

if os.path.exists(dataset_dir):
shutil.rmtree(dataset_dir)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
# Extract files
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)
return dataset_dir


Expand Down
Loading
Loading