lifebit-ai · dapineyro · Jan 26, 2026 · Jan 26, 2026 · Jan 26, 2026 · Jan 27, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 ## lifebit-ai/cloudos-cli: changelog
 
+## v2.79.0 (2026-01-26)
+
+### Feat
+
+- Adds `--api-docs` flag to `cloudos datasets ls` command to document API usage
+
 ## v2.78.0 (2026-01-13)
 
 ### Feat

diff --git a/cloudos_cli/__main__.py b/cloudos_cli/__main__.py
@@ -3195,6 +3195,9 @@ def run_bash_array_job(ctx,
                     'Details contains "Type", "Owner", "Size", "Last Updated", ' +
                     '"Virtual Name", "Storage Path".'),
               is_flag=True)
+@click.option('--api-docs',
+              help='Display the CloudOS API endpoints called by this command with curl examples.',
+              is_flag=True)
 @click.option('--output-format',
               help=('The desired display for the output, either directly in standard output or saved as file. ' +
                     'Default=stdout.'),
@@ -3217,21 +3220,40 @@ def list_files(ctx,
                profile,
                path,
                details,
+               api_docs,
                output_format,
                output_basename):
-    """List contents of a path within a CloudOS workspace dataset."""
+    """Lists the dataset information within the CloudOS platform.
+
+    Examples:
+        cloudos datasets ls --project-name my-project
+        cloudos datasets ls --project-name my-project --path Data
+        cloudos datasets ls --project-name my-project --path Data/results --details
+        cloudos datasets ls --project-name my-project --api-docs
+    """
     verify_ssl = ssl_selector(disable_ssl_verification, ssl_cert)
 
-    datasets = Datasets(
-        cloudos_url=cloudos_url,
-        apikey=apikey,
-        workspace_id=workspace_id,
-        project_name=project_name,
-        verify=verify_ssl,
-        cromwell_token=None
-    )
+    # Initialize API call tracker if --api-docs is enabled
+    from cloudos_cli.datasets.datasets import APICallTracker
+    tracker = None
+    if api_docs:
+        tracker = APICallTracker(
+            cloudos_url=cloudos_url,
+            workspace_id=workspace_id,
+            verify=verify_ssl
+        )
 
     try:
+        datasets = Datasets(
+            cloudos_url=cloudos_url,
+            apikey=apikey,
+            workspace_id=workspace_id,
+            project_name=project_name,
+            verify=verify_ssl,
+            cromwell_token=None,
+            api_docs_tracker=tracker
+        )
+
         result = datasets.list_folder_content(path)
         contents = result.get("contents") or result.get("datasets", [])
 
@@ -3262,7 +3284,7 @@ def list_files(ctx,
                     type_ = "file (user uploaded)"
                 else:
                     type_ = "file (virtual copy)"
-                    
+
             user = item.get("user", {})
             if isinstance(user, dict):
                 name = user.get("name", "").strip()
@@ -3373,6 +3395,10 @@ def list_files(ctx,
                     else:
                         console.print(item['name'])
 
+        # Display API documentation if requested
+        if api_docs and tracker:
+            click.echo(tracker.get_documentation())
+
     except Exception as e:
         raise ValueError(f"Failed to list files for project '{project_name}'. {str(e)}")
 
@@ -4364,4 +4390,4 @@ def link_command(ctx,
         else:
             logger.error(e)
             click.echo(click.style(f"Error: {e}", fg='red'), err=True)
-        sys.exit(1)
+        sys.exit(1)
diff --git a/cloudos_cli/_version.py b/cloudos_cli/_version.py
@@ -1 +1 @@
-__version__ = '2.78.0'
+__version__ = '2.79.0'
diff --git a/cloudos_cli/datasets/datasets.py b/cloudos_cli/datasets/datasets.py
@@ -3,13 +3,105 @@
 """
 
 from dataclasses import dataclass
-from typing import Union
+from typing import Union, Optional, List, Dict, Any
 from cloudos_cli.clos import Cloudos
 from cloudos_cli.utils.errors import BadRequestException
 from cloudos_cli.utils.requests import retry_requests_get, retry_requests_put, retry_requests_post, retry_requests_delete
 import json
 
 
+class APICallTracker:
+    """Tracks API calls for documentation purposes."""
+
+    def __init__(self, cloudos_url: str, workspace_id: str, verify: Union[bool, str]):
+        self.calls: List[Dict[str, Any]] = []
+        self.cloudos_url = cloudos_url
+        self.workspace_id = workspace_id
+        self.verify = verify
+        self.project_id: Optional[str] = None
+        self.project_name: Optional[str] = None
+
+    def track(self, method: str, url: str, purpose: str, extraction_hint: str = ""):
+        """Track an API call.
+
+        Parameters
+        ----------
+        method : str
+            HTTP method (GET, POST, PUT, DELETE)
+        url : str
+            Full URL of the API endpoint
+        purpose : str
+            Human-readable description of what this call does
+        extraction_hint : str, optional
+            Instructions on how to extract data from the response for next calls
+        """
+        self.calls.append({
+            'method': method,
+            'url': url,
+            'purpose': purpose,
+            'extraction_hint': extraction_hint
+        })
+
+    def get_documentation(self) -> str:
+        """Generate curl-based API documentation.
+
+        Returns
+        -------
+        str
+            Formatted API documentation
+        """
+        if not self.calls:
+            return "No API calls were made."
+
+        # Build requirements section
+        doc_lines = []
+        doc_lines.append("\n" + "="*80)
+        doc_lines.append("Platform API Instructions")
+        doc_lines.append("="*80)
+        doc_lines.append("\n### Requirements")
+        doc_lines.append(f"workspace-id = {self.workspace_id}")
+        if self.project_name:
+            doc_lines.append(f"project-name = {self.project_name}")
+        doc_lines.append("apikey = <YOUR_APIKEY>")
+
+        # SSL verification note
+        if isinstance(self.verify, str):
+            doc_lines.append(f"ssl-cert = {self.verify}")
+        elif self.verify is False:
+            doc_lines.append("ssl-verification = disabled")
+
+        # Build endpoints section
+        doc_lines.append("\n### Used Endpoints")
+        for i, call in enumerate(self.calls, 1):
+            doc_lines.append(f"\n{i}. {call['purpose']}")
+            # Build curl command
+            curl_parts = ["curl -X", call['method']]
+            # Add SSL flag if needed
+            if isinstance(self.verify, str):
+                curl_parts.append(f"--cacert {self.verify}")
+            elif self.verify is False:
+                curl_parts.append("-k")
+            # Add headers
+            curl_parts.append('-H "Content-type: application/json"')
+            curl_parts.append('-H "apikey: <YOUR_APIKEY>"')
+            # Add URL
+            curl_parts.append(f'"{call["url"]}"')
+            doc_lines.append("   " + " ".join(curl_parts))
+
+        # Build usage instructions section
+        doc_lines.append("\n### How to Use Them")
+        doc_lines.append("\nExecute the curl commands in sequence:")
+        for i, call in enumerate(self.calls, 1):
+            if call['extraction_hint']:
+                doc_lines.append(f"\n{i}. {call['purpose']}")
+                doc_lines.append(f"   {call['extraction_hint']}")
+        if len(self.calls) == 1 and not self.calls[0]['extraction_hint']:
+            doc_lines.append("\nExecute the curl command above. The response will contain the list of datasets.")
+        doc_lines.append("\n" + "="*80 + "\n")
+
+        return "\n".join(doc_lines)
+
+
 @dataclass
 class Datasets(Cloudos):
     """Class for file explorer.
@@ -30,12 +122,23 @@ class Datasets(Cloudos):
         the SSL certificate file.
     project_id : string
         The CloudOS project id for a given project name.
+    api_docs_tracker : APICallTracker, optional
+        Tracker for API calls when generating documentation.
     """
     workspace_id: str
     project_name: str
     verify: Union[bool, str] = True
+    api_docs_tracker: Optional[APICallTracker] = None
     project_id: str = None
 
+    def __post_init__(self):
+        """Post-initialization to set up tracker with project details."""
+        # Ensure tracker has project_name and project_id if it exists
+        if self.api_docs_tracker:
+            self.api_docs_tracker.project_name = self.project_name
+            if self.project_id:
+                self.api_docs_tracker.project_id = self.project_id
+
     @property
     def project_id(self) -> str:
         return self._project_id
@@ -51,6 +154,11 @@ def project_id(self, v) -> None:
         else:
             # Let the user define the value.
             self._project_id = v
+            # Update tracker if present (use getattr to avoid AttributeError during initialization)
+            tracker = getattr(self, 'api_docs_tracker', None)
+            if tracker and v:
+                tracker.project_id = v
+                tracker.project_name = self.project_name
 
     def fetch_project_id(self,
                          workspace_id,
@@ -74,7 +182,24 @@ def fetch_project_id(self,
         project_id : string
             The CloudOS project id for a given project name.
         """
-        return self.get_project_id_from_name(workspace_id, project_name, verify=verify)
+        # Use getattr to avoid AttributeError if called during initialization
+        tracker = getattr(self, 'api_docs_tracker', None)
+        if tracker:
+            url = f"{self.cloudos_url}/api/v2/projects?teamId={workspace_id}&search={project_name}"
+            tracker.track(
+                method="GET",
+                url=url,
+                purpose="Resolve project name to project ID",
+                extraction_hint=f"Extract the '_id' field from the project object where 'name' equals '{project_name}' in the response. Use: jq '.projects[] | select(.name==\"{project_name}\") | ._id'"
+            )
+
+        project_id = self.get_project_id_from_name(workspace_id, project_name, verify=verify)
+
+        if tracker:
+            tracker.project_id = project_id
+            tracker.project_name = project_name
+
+        return project_id
 
     def list_project_content(self):
         """
@@ -91,14 +216,23 @@ def list_project_content(self):
         project_id
             The specific project id
         """
+        url = "{}/api/v2/datasets?projectId={}&teamId={}".format(self.cloudos_url,
+                                                                 self.project_id,
+                                                                 self.workspace_id)
+
+        if self.api_docs_tracker:
+            self.api_docs_tracker.track(
+                method="GET",
+                url=url,
+                purpose=f"List all top-level datasets in the project '{self.project_name}' (project_id: '{self.project_id}'.",
+                extraction_hint="The response contains a 'datasets' array with all top-level datasets. Each dataset has '_id', 'name', and other metadata fields. To navigate deeper, extract the '_id' of the desired dataset."
+            )
+
         headers = {
             "Content-type": "application/json",
             "apikey": self.apikey
         }
-        r = retry_requests_get("{}/api/v2/datasets?projectId={}&teamId={}".format(self.cloudos_url,
-                                                                                  self.project_id,
-                                                                                  self.workspace_id),
-                               headers=headers, verify=self.verify)
+        r = retry_requests_get(url, headers=headers, verify=self.verify)
         if r.status_code >= 400:
             raise BadRequestException(r)
         raw = r.json()
@@ -142,10 +276,20 @@ def list_datasets_content(self, folder_name):
                 folder_id = folder['_id']
         if not folder_id:
             raise ValueError(f"Folder '{folder_name}' not found in project '{self.project_name}'.")
-        r = retry_requests_get("{}/api/v1/datasets/{}/items?teamId={}".format(self.cloudos_url,
-                                                                              folder_id,
-                                                                              self.workspace_id),
-                               headers=headers, verify=self.verify)
+
+        url = "{}/api/v1/datasets/{}/items?teamId={}".format(self.cloudos_url,
+                                                             folder_id,
+                                                             self.workspace_id)
+
+        if self.api_docs_tracker:
+            self.api_docs_tracker.track(
+                method="GET",
+                url=url,
+                purpose=f"List contents of dataset '{folder_name}' (dataset_id: {folder_id})",
+                extraction_hint="The response contains 'folders' and 'files' arrays. Folders have '_id', 'name', 'folderType' fields. Files have metadata like 'name', 'sizeInBytes', 'updatedAt'. For deeper navigation, use the folder's '_id' or inspect 'folderType' to determine the next API call."
+            )
+
+        r = retry_requests_get(url, headers=headers, verify=self.verify)
         if r.status_code >= 400:
             raise BadRequestException(r)
         return r.json()
@@ -172,11 +316,20 @@ def list_s3_folder_content(self, s3_bucket_name, s3_relative_path):
             "apikey": self.apikey
         }
 
-        r = retry_requests_get("{}/api/v1/data-access/s3/bucket-contents?bucket={}&path={}&teamId={}".format(self.cloudos_url,
-                                                                                                             s3_bucket_name,
-                                                                                                             s3_relative_path,
-                                                                                                             self.workspace_id),
-                               headers=headers, verify=self.verify)
+        url = "{}/api/v1/data-access/s3/bucket-contents?bucket={}&path={}&teamId={}".format(self.cloudos_url,
+                                                                                            s3_bucket_name,
+                                                                                            s3_relative_path,
+                                                                                            self.workspace_id)
+
+        if self.api_docs_tracker:
+            self.api_docs_tracker.track(
+                method="GET",
+                url=url,
+                purpose=f"List S3 folder contents (bucket: {s3_bucket_name}, path: {s3_relative_path})",
+                extraction_hint="The response contains a 'contents' array with objects having 'name', 'path', 'isDir', and 'size' fields. Items where 'isDir' is true are folders; use their 'path' for further navigation."
+            )
+
+        r = retry_requests_get(url, headers=headers, verify=self.verify)
         if r.status_code >= 400:
             raise BadRequestException(r)
         raw = r.json()
@@ -216,10 +369,19 @@ def list_virtual_folder_content(self, folder_id):
             "apikey": self.apikey
         }
 
-        r = retry_requests_get("{}/api/v1/folders/virtual/{}/items?teamId={}".format(self.cloudos_url,
-                                                                                     folder_id,
-                                                                                     self.workspace_id),
-                               headers=headers, verify=self.verify)
+        url = "{}/api/v1/folders/virtual/{}/items?teamId={}".format(self.cloudos_url,
+                                                                    folder_id,
+                                                                    self.workspace_id)
+
+        if self.api_docs_tracker:
+            self.api_docs_tracker.track(
+                method="GET",
+                url=url,
+                purpose=f"List virtual folder contents (folder_id: {folder_id})",
+                extraction_hint="The response contains 'folders' and 'files' arrays similar to dataset contents. Use folder '_id' and 'folderType' to navigate deeper into the structure."
+            )
+
+        r = retry_requests_get(url, headers=headers, verify=self.verify)
         if r.status_code >= 400:
             raise BadRequestException(r)
         return r.json()
@@ -237,6 +399,14 @@ def list_azure_container_content(self, container_name: str, storage_account_name
         url += f"?containerName={container_name}&storageAccountName={storage_account_name}"
         url += f"&path={path}&teamId={self.workspace_id}"
 
+        if self.api_docs_tracker:
+            self.api_docs_tracker.track(
+                method="GET",
+                url=url,
+                purpose=f"List Azure Blob container contents (container: {container_name}, account: {storage_account_name}, path: {path})",
+                extraction_hint="The response contains a 'contents' array with objects having 'name', 'path', 'isDir', 'size', and 'lastModified' fields. Items where 'isDir' is true are folders."
+            )
+
         r = retry_requests_get(url, headers=headers, verify=self.verify)
         if r.status_code >= 400:
             raise BadRequestException(r)
@@ -362,7 +532,6 @@ def list_folder_content(self, path=None):
                             "files": [file_item],
                             "folders": []
                         }
-
                 # Also check in contents array (for different API response formats)
                 for item in folder_content.get("contents", []):
                     if item["name"] == job_name and not item.get("isDir", True):