Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cirro/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from cirro.cli.controller import run_ingest, run_download, run_configure, run_list_datasets
from cirro.cli.controller import run_create_pipeline_config, run_validate_folder
from cirro.cli.controller import run_list_projects, run_list_files

__all__ = [
'run_ingest',
'run_download',
'run_configure',
'run_list_datasets',
'run_create_pipeline_config',
'run_validate_folder'
'run_validate_folder',
'run_list_projects',
'run_list_files',
]
29 changes: 28 additions & 1 deletion cirro/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from cirro.cli import run_create_pipeline_config, run_validate_folder
from cirro.cli import run_ingest, run_download, run_configure, run_list_datasets
from cirro.cli.controller import handle_error, run_upload_reference
from cirro.cli.controller import handle_error, run_upload_reference, run_list_projects, run_list_files
from cirro.cli.interactive.utils import InputError


Expand All @@ -25,6 +25,27 @@ def run():
pass # Print out help text, nothing to do


@run.command(help='List projects')
def list_projects():
run_list_projects()


@run.command(help='List files in a dataset', no_args_is_help=True)
@click.option('--project',
help='Name or ID of the project')
@click.option('--dataset',
help='Name or ID of the dataset')
@click.option('--file-limit',
help='Maximum number of files to list',
default=100000, show_default=True)
@click.option('-i', '--interactive',
help='Gather arguments interactively',
is_flag=True, default=False)
def list_files(**kwargs):
check_required_args(kwargs)
run_list_files(kwargs, interactive=kwargs.get('interactive'))


@run.command(help='List datasets', no_args_is_help=True)
@click.option('--project',
help='Name or ID of the project')
Expand All @@ -47,6 +68,9 @@ def list_datasets(**kwargs):
multiple=True)
@click.option('--data-directory',
help='Directory to store the files')
@click.option('--file-limit',
help='Maximum number of files to enumerate from the dataset',
default=100000, show_default=True)
@click.option('-i', '--interactive',
help='Gather arguments interactively',
is_flag=True, default=False)
Expand Down Expand Up @@ -89,6 +113,9 @@ def upload(**kwargs):
help='Name or ID of the project')
@click.option('--data-directory',
help='Local directory you wish to validate')
@click.option('--file-limit',
help='Maximum number of files to enumerate from the dataset',
default=100000, show_default=True)
@click.option('-i', '--interactive',
help='Gather arguments interactively',
is_flag=True, default=False)
Expand Down
62 changes: 56 additions & 6 deletions cirro/cli/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from cirro.cli.interactive.utils import get_id_from_name, get_item_from_name_or_id, InputError, validate_files
from cirro.cli.interactive.validate_args import gather_validate_arguments, gather_validate_arguments_dataset
from cirro.cli.models import ListArguments, UploadArguments, DownloadArguments, CreatePipelineConfigArguments, \
UploadReferenceArguments, ValidateArguments
UploadReferenceArguments, ValidateArguments, ListFilesArguments
from cirro.config import UserConfig, save_user_config, load_user_config
from cirro.file_utils import get_files_in_directory
from cirro.models.process import PipelineDefinition, ConfigAppStatus, CONFIG_APP_URL
Expand Down Expand Up @@ -119,7 +119,10 @@ def run_validate_folder(input_params: ValidateArguments, interactive=False):
# Filter out datasets that are not complete
datasets = [d for d in datasets if d.status == Status.COMPLETED]
input_params = gather_validate_arguments_dataset(input_params, datasets)
files = cirro.datasets.get_assets_listing(input_params['project'], input_params['dataset']).files
files = cirro.datasets.get_assets_listing(
input_params['project'], input_params['dataset'],
file_limit=input_params['file_limit']
).files

if len(files) == 0:
raise InputError('There are no files in this dataset to validate against')
Expand All @@ -137,7 +140,8 @@ def run_validate_folder(input_params: ValidateArguments, interactive=False):
results = cirro.datasets.validate_folder(
project_id=project_id,
dataset_id=dataset_id,
local_folder=input_params['data_directory']
local_folder=input_params['data_directory'],
file_limit=input_params['file_limit']
)

for file_list, label, log_level in [
Expand Down Expand Up @@ -165,7 +169,10 @@ def run_download(input_params: DownloadArguments, interactive=False):
# Filter out datasets that are not complete
datasets = [d for d in datasets if d.status == Status.COMPLETED]
input_params = gather_download_arguments_dataset(input_params, datasets)
files = cirro.datasets.get_assets_listing(input_params['project'], input_params['dataset']).files
files = cirro.datasets.get_assets_listing(
input_params['project'], input_params['dataset'],
file_limit=input_params['file_limit']
).files

if len(files) == 0:
raise InputError('There are no files in this dataset to download')
Expand All @@ -180,7 +187,9 @@ def run_download(input_params: DownloadArguments, interactive=False):
dataset_id = get_id_from_name(datasets, input_params['dataset'])

if input_params['file']:
all_files = cirro.datasets.get_assets_listing(project_id, dataset_id).files
all_files = cirro.datasets.get_assets_listing(
project_id, dataset_id, file_limit=input_params['file_limit']
).files
files_to_download = []

for filepath in input_params['file']:
Expand All @@ -198,7 +207,48 @@ def run_download(input_params: DownloadArguments, interactive=False):
cirro.datasets.download_files(project_id=project_id,
dataset_id=dataset_id,
download_location=input_params['data_directory'],
files=files_to_download)
files=files_to_download,
file_limit=input_params['file_limit'])


def run_list_projects():
"""List all available projects."""
cirro = _init_cirro_client()
projects = _get_projects(cirro)

import pandas as pd
df = pd.DataFrame([{'id': p.id, 'name': p.name} for p in projects])
print(df.to_string(index=False))


def run_list_files(input_params: ListFilesArguments, interactive=False):
"""List files available in a dataset."""
cirro = _init_cirro_client()
projects = _get_projects(cirro)

if interactive:
from cirro.cli.interactive.common_args import ask_project, ask_dataset
from cirro.services.service_helpers import list_all_datasets
project_name = ask_project(projects, input_params.get('project'))
project_id = get_id_from_name(projects, project_name)
datasets = list_all_datasets(project_id=project_id, client=cirro)
dataset_id = ask_dataset(datasets, input_params.get('dataset'), msg_action='list files for')
else:
project_id = get_id_from_name(projects, input_params['project'])
datasets = cirro.datasets.list(project_id)
dataset_id = get_id_from_name(datasets, input_params['dataset'])

files = cirro.datasets.get_assets_listing(
project_id, dataset_id, file_limit=input_params['file_limit']
).files

if len(files) == 0:
logger.info("No files found in this dataset")
return

import pandas as pd
df = pd.DataFrame([{'path': f.normalized_path, 'size': f.size} for f in files])
print(df.to_string(index=False))


def run_upload_reference(input_params: UploadReferenceArguments, interactive=False):
Expand Down
9 changes: 9 additions & 0 deletions cirro/cli/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class DownloadArguments(TypedDict):
data_directory: str
interactive: bool
file: Optional[list[str]]
file_limit: int


class UploadArguments(TypedDict):
Expand All @@ -25,6 +26,7 @@ class ValidateArguments(TypedDict):
project: str
data_directory: str
interactive: bool
file_limit: int


class ListArguments(TypedDict):
Expand All @@ -45,3 +47,10 @@ class UploadReferenceArguments(TypedDict):
project: str
reference_file: list[str]
interactive: bool


class ListFilesArguments(TypedDict):
project: str
dataset: str
interactive: bool
file_limit: int
13 changes: 11 additions & 2 deletions cirro/sdk/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,11 +276,20 @@ def get_file(self, relative_path: str) -> DataPortalFile:
msg = '\n'.join([f"No file found with path '{relative_path}'."])
raise DataPortalAssetNotFound(msg)

def list_files(self) -> DataPortalFiles:
def list_files(self, file_limit: int = 100000) -> DataPortalFiles:
"""
Return the list of files which make up the dataset.

Args:
file_limit (int): Maximum number of files to return (default 100,000)
"""
files = self._get_assets().files
assets = self._client.datasets.get_assets_listing(
project_id=self.project_id,
dataset_id=self.id,
file_limit=file_limit
)
files = assets.files

return DataPortalFiles(
[
DataPortalFile(file=file, client=self._client)
Expand Down
12 changes: 7 additions & 5 deletions cirro/services/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from cirro_api_client.v1.api.sharing import get_shared_datasets
from cirro_api_client.v1.models import ImportDataRequest, UploadDatasetRequest, UpdateDatasetRequest, Dataset, \
DatasetDetail, CreateResponse, UploadDatasetCreateResponse, FileEntry

from cirro.file_utils import is_hidden_file
from cirro.models.assets import DatasetAssets, Artifact
from cirro.models.dataset import DatasetValidationResponse
Expand Down Expand Up @@ -309,12 +308,13 @@ def validate_folder(
self,
project_id: str,
dataset_id: str,
local_folder: PathLike
local_folder: PathLike,
file_limit: int = 100000
) -> DatasetValidationResponse:
"""
Validates that the contents of a dataset match that of a local folder.
"""
ds_files = self.get_assets_listing(project_id, dataset_id).files
ds_files = self.get_assets_listing(project_id, dataset_id, file_limit=file_limit).files

local_folder = Path(local_folder)
if not local_folder.is_dir():
Expand Down Expand Up @@ -367,7 +367,8 @@ def download_files(
project_id: str,
dataset_id: str,
download_location: str,
files: Union[List[File], List[str]] = None
files: Union[List[File], List[str]] = None,
file_limit: int = 100000
) -> None:
"""
Downloads files from a dataset
Expand All @@ -380,9 +381,10 @@ def download_files(
dataset_id (str): ID of the Dataset
download_location (str): Local destination for downloaded files
files (typing.List[str]): Optional list of files to download
file_limit (int): Maximum number of files to get (default 100,000)
"""
if files is None:
files = self.get_assets_listing(project_id, dataset_id).files
files = self.get_assets_listing(project_id, dataset_id, file_limit=file_limit).files

if len(files) == 0:
return
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cirro"
version = "1.10.4"
version = "1.11.0"
description = "CLI tool and SDK for interacting with the Cirro platform"
authors = ["Cirro Bio <support@cirro.bio>"]
license = "MIT"
Expand Down
Loading