Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
## lifebit-ai/cloudos-cli: changelog

## v2.79.0 (2026-01-26)

### Feat

- Adds `--api-docs` flag to `cloudos datasets ls` command to document API usage

## v2.78.0 (2026-01-13)

### Feat
Expand Down
48 changes: 37 additions & 11 deletions cloudos_cli/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3195,6 +3195,9 @@ def run_bash_array_job(ctx,
'Details contains "Type", "Owner", "Size", "Last Updated", ' +
'"Virtual Name", "Storage Path".'),
is_flag=True)
@click.option('--api-docs',
help='Display the CloudOS API endpoints called by this command with curl examples.',
is_flag=True)
@click.option('--output-format',
help=('The desired display for the output, either directly in standard output or saved as file. ' +
'Default=stdout.'),
Expand All @@ -3217,21 +3220,40 @@ def list_files(ctx,
profile,
path,
details,
api_docs,
output_format,
output_basename):
"""List contents of a path within a CloudOS workspace dataset."""
"""Lists the dataset information within the CloudOS platform.

Examples:
cloudos datasets ls --project-name my-project
cloudos datasets ls --project-name my-project --path Data
cloudos datasets ls --project-name my-project --path Data/results --details
cloudos datasets ls --project-name my-project --api-docs
"""
verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert)

datasets = Datasets(
cloudos_url=cloudos_url,
apikey=apikey,
workspace_id=workspace_id,
project_name=project_name,
verify=verify_ssl,
cromwell_token=None
)
# Initialize API call tracker if --api-docs is enabled
from cloudos_cli.datasets.datasets import APICallTracker
tracker = None
if api_docs:
tracker = APICallTracker(
cloudos_url=cloudos_url,
workspace_id=workspace_id,
verify=verify_ssl
)

try:
datasets = Datasets(
cloudos_url=cloudos_url,
apikey=apikey,
workspace_id=workspace_id,
project_name=project_name,
verify=verify_ssl,
cromwell_token=None,
api_docs_tracker=tracker
)

result = datasets.list_folder_content(path)
contents = result.get("contents") or result.get("datasets", [])

Expand Down Expand Up @@ -3262,7 +3284,7 @@ def list_files(ctx,
type_ = "file (user uploaded)"
else:
type_ = "file (virtual copy)"

user = item.get("user", {})
if isinstance(user, dict):
name = user.get("name", "").strip()
Expand Down Expand Up @@ -3373,6 +3395,10 @@ def list_files(ctx,
else:
console.print(item['name'])

# Display API documentation if requested
if api_docs and tracker:
click.echo(tracker.get_documentation())

except Exception as e:
raise ValueError(f"Failed to list files for project '{project_name}'. {str(e)}")

Expand Down Expand Up @@ -4364,4 +4390,4 @@ def link_command(ctx,
else:
logger.error(e)
click.echo(click.style(f"Error: {e}", fg='red'), err=True)
sys.exit(1)
sys.exit(1)
2 changes: 1 addition & 1 deletion cloudos_cli/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '2.78.0'
__version__ = '2.79.0'
209 changes: 189 additions & 20 deletions cloudos_cli/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,105 @@
"""

from dataclasses import dataclass
from typing import Union
from typing import Union, Optional, List, Dict, Any
from cloudos_cli.clos import Cloudos
from cloudos_cli.utils.errors import BadRequestException
from cloudos_cli.utils.requests import retry_requests_get, retry_requests_put, retry_requests_post, retry_requests_delete
import json


class APICallTracker:
"""Tracks API calls for documentation purposes."""

def __init__(self, cloudos_url: str, workspace_id: str, verify: Union[bool, str]):
self.calls: List[Dict[str, Any]] = []
self.cloudos_url = cloudos_url
self.workspace_id = workspace_id
self.verify = verify
self.project_id: Optional[str] = None
self.project_name: Optional[str] = None

def track(self, method: str, url: str, purpose: str, extraction_hint: str = ""):
"""Track an API call.

Parameters
----------
method : str
HTTP method (GET, POST, PUT, DELETE)
url : str
Full URL of the API endpoint
purpose : str
Human-readable description of what this call does
extraction_hint : str, optional
Instructions on how to extract data from the response for next calls
"""
self.calls.append({
'method': method,
'url': url,
'purpose': purpose,
'extraction_hint': extraction_hint
})

def get_documentation(self) -> str:
"""Generate curl-based API documentation.

Returns
-------
str
Formatted API documentation
"""
if not self.calls:
return "No API calls were made."

# Build requirements section
doc_lines = []
doc_lines.append("\n" + "="*80)
doc_lines.append("Platform API Instructions")
doc_lines.append("="*80)
doc_lines.append("\n### Requirements")
doc_lines.append(f"workspace-id = {self.workspace_id}")
if self.project_name:
doc_lines.append(f"project-name = {self.project_name}")
doc_lines.append("apikey = <YOUR_APIKEY>")

# SSL verification note
if isinstance(self.verify, str):
doc_lines.append(f"ssl-cert = {self.verify}")
elif self.verify is False:
doc_lines.append("ssl-verification = disabled")

# Build endpoints section
doc_lines.append("\n### Used Endpoints")
for i, call in enumerate(self.calls, 1):
doc_lines.append(f"\n{i}. {call['purpose']}")
# Build curl command
curl_parts = ["curl -X", call['method']]
# Add SSL flag if needed
if isinstance(self.verify, str):
curl_parts.append(f"--cacert {self.verify}")
elif self.verify is False:
curl_parts.append("-k")
# Add headers
curl_parts.append('-H "Content-type: application/json"')
curl_parts.append('-H "apikey: <YOUR_APIKEY>"')
# Add URL
curl_parts.append(f'"{call["url"]}"')
doc_lines.append(" " + " ".join(curl_parts))

# Build usage instructions section
doc_lines.append("\n### How to Use Them")
doc_lines.append("\nExecute the curl commands in sequence:")
for i, call in enumerate(self.calls, 1):
if call['extraction_hint']:
doc_lines.append(f"\n{i}. {call['purpose']}")
doc_lines.append(f" {call['extraction_hint']}")
if len(self.calls) == 1 and not self.calls[0]['extraction_hint']:
doc_lines.append("\nExecute the curl command above. The response will contain the list of datasets.")
doc_lines.append("\n" + "="*80 + "\n")

return "\n".join(doc_lines)


@dataclass
class Datasets(Cloudos):
"""Class for file explorer.
Expand All @@ -30,12 +122,23 @@ class Datasets(Cloudos):
the SSL certificate file.
project_id : string
The CloudOS project id for a given project name.
api_docs_tracker : APICallTracker, optional
Tracker for API calls when generating documentation.
"""
workspace_id: str
project_name: str
verify: Union[bool, str] = True
api_docs_tracker: Optional[APICallTracker] = None
project_id: str = None

def __post_init__(self):
"""Post-initialization to set up tracker with project details."""
# Ensure tracker has project_name and project_id if it exists
if self.api_docs_tracker:
self.api_docs_tracker.project_name = self.project_name
if self.project_id:
self.api_docs_tracker.project_id = self.project_id

@property
def project_id(self) -> str:
return self._project_id
Expand All @@ -51,6 +154,11 @@ def project_id(self, v) -> None:
else:
# Let the user define the value.
self._project_id = v
# Update tracker if present (use getattr to avoid AttributeError during initialization)
tracker = getattr(self, 'api_docs_tracker', None)
if tracker and v:
tracker.project_id = v
tracker.project_name = self.project_name

def fetch_project_id(self,
workspace_id,
Expand All @@ -74,7 +182,24 @@ def fetch_project_id(self,
project_id : string
The CloudOS project id for a given project name.
"""
return self.get_project_id_from_name(workspace_id, project_name, verify=verify)
# Use getattr to avoid AttributeError if called during initialization
tracker = getattr(self, 'api_docs_tracker', None)
if tracker:
url = f"{self.cloudos_url}/api/v2/projects?teamId={workspace_id}&search={project_name}"
tracker.track(
method="GET",
url=url,
purpose="Resolve project name to project ID",
extraction_hint=f"Extract the '_id' field from the project object where 'name' equals '{project_name}' in the response. Use: jq '.projects[] | select(.name==\"{project_name}\") | ._id'"
)

project_id = self.get_project_id_from_name(workspace_id, project_name, verify=verify)

if tracker:
tracker.project_id = project_id
tracker.project_name = project_name

return project_id

def list_project_content(self):
"""
Expand All @@ -91,14 +216,23 @@ def list_project_content(self):
project_id
The specific project id
"""
url = "{}/api/v2/datasets?projectId={}&teamId={}".format(self.cloudos_url,
self.project_id,
self.workspace_id)

if self.api_docs_tracker:
self.api_docs_tracker.track(
method="GET",
url=url,
purpose=f"List all top-level datasets in the project '{self.project_name}' (project_id: '{self.project_id}'.",
extraction_hint="The response contains a 'datasets' array with all top-level datasets. Each dataset has '_id', 'name', and other metadata fields. To navigate deeper, extract the '_id' of the desired dataset."
)

headers = {
"Content-type": "application/json",
"apikey": self.apikey
}
r = retry_requests_get("{}/api/v2/datasets?projectId={}&teamId={}".format(self.cloudos_url,
self.project_id,
self.workspace_id),
headers=headers, verify=self.verify)
r = retry_requests_get(url, headers=headers, verify=self.verify)
if r.status_code >= 400:
raise BadRequestException(r)
raw = r.json()
Expand Down Expand Up @@ -142,10 +276,20 @@ def list_datasets_content(self, folder_name):
folder_id = folder['_id']
if not folder_id:
raise ValueError(f"Folder '{folder_name}' not found in project '{self.project_name}'.")
r = retry_requests_get("{}/api/v1/datasets/{}/items?teamId={}".format(self.cloudos_url,
folder_id,
self.workspace_id),
headers=headers, verify=self.verify)

url = "{}/api/v1/datasets/{}/items?teamId={}".format(self.cloudos_url,
folder_id,
self.workspace_id)

if self.api_docs_tracker:
self.api_docs_tracker.track(
method="GET",
url=url,
purpose=f"List contents of dataset '{folder_name}' (dataset_id: {folder_id})",
extraction_hint="The response contains 'folders' and 'files' arrays. Folders have '_id', 'name', 'folderType' fields. Files have metadata like 'name', 'sizeInBytes', 'updatedAt'. For deeper navigation, use the folder's '_id' or inspect 'folderType' to determine the next API call."
)

r = retry_requests_get(url, headers=headers, verify=self.verify)
if r.status_code >= 400:
raise BadRequestException(r)
return r.json()
Expand All @@ -172,11 +316,20 @@ def list_s3_folder_content(self, s3_bucket_name, s3_relative_path):
"apikey": self.apikey
}

r = retry_requests_get("{}/api/v1/data-access/s3/bucket-contents?bucket={}&path={}&teamId={}".format(self.cloudos_url,
s3_bucket_name,
s3_relative_path,
self.workspace_id),
headers=headers, verify=self.verify)
url = "{}/api/v1/data-access/s3/bucket-contents?bucket={}&path={}&teamId={}".format(self.cloudos_url,
s3_bucket_name,
s3_relative_path,
self.workspace_id)

if self.api_docs_tracker:
self.api_docs_tracker.track(
method="GET",
url=url,
purpose=f"List S3 folder contents (bucket: {s3_bucket_name}, path: {s3_relative_path})",
extraction_hint="The response contains a 'contents' array with objects having 'name', 'path', 'isDir', and 'size' fields. Items where 'isDir' is true are folders; use their 'path' for further navigation."
)

r = retry_requests_get(url, headers=headers, verify=self.verify)
if r.status_code >= 400:
raise BadRequestException(r)
raw = r.json()
Expand Down Expand Up @@ -216,10 +369,19 @@ def list_virtual_folder_content(self, folder_id):
"apikey": self.apikey
}

r = retry_requests_get("{}/api/v1/folders/virtual/{}/items?teamId={}".format(self.cloudos_url,
folder_id,
self.workspace_id),
headers=headers, verify=self.verify)
url = "{}/api/v1/folders/virtual/{}/items?teamId={}".format(self.cloudos_url,
folder_id,
self.workspace_id)

if self.api_docs_tracker:
self.api_docs_tracker.track(
method="GET",
url=url,
purpose=f"List virtual folder contents (folder_id: {folder_id})",
extraction_hint="The response contains 'folders' and 'files' arrays similar to dataset contents. Use folder '_id' and 'folderType' to navigate deeper into the structure."
)

r = retry_requests_get(url, headers=headers, verify=self.verify)
if r.status_code >= 400:
raise BadRequestException(r)
return r.json()
Expand All @@ -237,6 +399,14 @@ def list_azure_container_content(self, container_name: str, storage_account_name
url += f"?containerName={container_name}&storageAccountName={storage_account_name}"
url += f"&path={path}&teamId={self.workspace_id}"

if self.api_docs_tracker:
self.api_docs_tracker.track(
method="GET",
url=url,
purpose=f"List Azure Blob container contents (container: {container_name}, account: {storage_account_name}, path: {path})",
extraction_hint="The response contains a 'contents' array with objects having 'name', 'path', 'isDir', 'size', and 'lastModified' fields. Items where 'isDir' is true are folders."
)

r = retry_requests_get(url, headers=headers, verify=self.verify)
if r.status_code >= 400:
raise BadRequestException(r)
Expand Down Expand Up @@ -362,7 +532,6 @@ def list_folder_content(self, path=None):
"files": [file_item],
"folders": []
}

# Also check in contents array (for different API response formats)
for item in folder_content.get("contents", []):
if item["name"] == job_name and not item.get("isDir", True):
Expand Down
Loading