From 15d5cee00b9fe57cafd0c61135b8df217baa67ee Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 20 Apr 2026 23:33:09 +0000
Subject: [PATCH 1/3] Port bulkFetch and documentClassCounts from
 ndi.cloud.api.documents

Mirrors two new commands added to the MATLAB +ndi/+cloud/+api/+documents
namespace. MATLAB routes them through +implementation wrappers that
normalize output style; the Python port uses CloudClient for the same
role, so no +implementation mirror is needed.

INTERFACE UPDATE: Added bulkFetch and documentClassCounts entries to
src/ndi/cloud/api/ndi_matlab_python_bridge.yaml.

- bulkFetch: POST /datasets/{datasetId}/documents/bulk-fetch; mirrors
  MATLAB input validation (non-empty, <= 500 entries, 24-char hex IDs)
  and returns the 'documents' array.
- documentClassCounts: GET /datasets/{datasetId}/document-class-counts;
  returns the datasetId/totalDocuments/classCounts struct.
---
 src/ndi/cloud/api/documents.py                | 84 +++++++++++++++++
 .../cloud/api/ndi_matlab_python_bridge.yaml   | 47 ++++++++++
 tests/test_cloud_api_documents.py             | 91 +++++++++++++++++++
 3 files changed, 222 insertions(+)
 create mode 100644 tests/test_cloud_api_documents.py

diff --git a/src/ndi/cloud/api/documents.py b/src/ndi/cloud/api/documents.py
index 867ca4e..f1a4a8c 100644
--- a/src/ndi/cloud/api/documents.py
+++ b/src/ndi/cloud/api/documents.py
@@ -10,6 +10,7 @@
 
 from __future__ import annotations
 
+import re
 from typing import Annotated, Any
 
 from pydantic import SkipValidation, validate_call
@@ -17,6 +18,9 @@
 from ..client import APIResponse, CloudClient, _auto_client
 from ._validators import VALIDATE_CONFIG, CloudId, FilePath, PageNumber, PageSize, Scope
 
+_HEX24 = re.compile(r"^[0-9a-fA-F]{24}$")
+_BULK_FETCH_MAX = 500
+
 _Client = Annotated[CloudClient | None, SkipValidation()]
 
 
@@ -178,6 +182,86 @@ def countDocuments(dataset_id: CloudId, *, client: _Client = None) -> int:
     return ds.get("documentCount", 0)
 
 
+@_auto_client
+@validate_call(config=VALIDATE_CONFIG)
+def bulkFetch(
+    dataset_id: CloudId,
+    doc_ids: list[str],
+    *,
+    client: _Client = None,
+) -> list[dict[str, Any]]:
+    """POST /datasets/{datasetId}/documents/bulk-fetch
+
+    Synchronously fetch up to 500 documents (with full data) from a
+    dataset in a single call.  This is the fast synchronous companion
+    to the asynchronous :func:`getBulkDownloadURL` pipeline and is
+    intended for small sets (e.g. a subset of IDs returned by
+    :func:`ndiquery`).
+
+    Documents that do not exist, are soft-deleted, or do not belong to
+    the specified dataset are silently omitted from the response.  The
+    order of the returned documents is not guaranteed to match the
+    request order.
+
+    MATLAB equivalent: +cloud/+api/+documents/bulkFetch.m
+
+    Args:
+        dataset_id: The ID of the dataset containing the documents.
+        doc_ids: Document IDs to fetch.  Must be non-empty, at most 500
+            entries, and each entry must be a 24-character hex string.
+        client: Authenticated cloud client (auto-created if omitted).
+
+    Returns:
+        A list of document dicts, each with fields ``id``, ``ndiId``,
+        ``name``, ``className``, ``datasetId``, and ``data``.
+    """
+    if not doc_ids:
+        raise ValueError("doc_ids must be non-empty")
+    if len(doc_ids) > _BULK_FETCH_MAX:
+        raise ValueError(f"doc_ids must have at most {_BULK_FETCH_MAX} entries")
+    for did in doc_ids:
+        if not _HEX24.match(did):
+            raise ValueError(f"doc_ids entries must be 24-character hex strings: {did!r}")
+    result = client.post(
+        "/datasets/{datasetId}/documents/bulk-fetch",
+        json={"documentIds": list(doc_ids)},
+        datasetId=dataset_id,
+    )
+    return result.get("documents", []) if isinstance(result, dict) else list(result or [])
+
+
+@_auto_client
+@validate_call(config=VALIDATE_CONFIG)
+def documentClassCounts(
+    dataset_id: CloudId,
+    *,
+    client: _Client = None,
+) -> dict[str, Any]:
+    """GET /datasets/{datasetId}/document-class-counts
+
+    Retrieve a flat histogram of documents in a dataset grouped by leaf
+    ``data.document_class.class_name``.  No inheritance roll-up is
+    performed; for class-aware drill-downs use :func:`ndiquery` with
+    the ``isa`` operator.
+
+    MATLAB equivalent: +cloud/+api/+documents/documentClassCounts.m
+
+    Args:
+        dataset_id: The ID of the dataset to query.
+        client: Authenticated cloud client (auto-created if omitted).
+
+    Returns:
+        Dict with fields ``datasetId``, ``totalDocuments``, and
+        ``classCounts`` (a mapping of class name to integer count).
+        Documents with missing/empty ``class_name`` are bucketed under
+        ``'unknown'``.
+    """
+    return client.get(
+        "/datasets/{datasetId}/document-class-counts",
+        datasetId=dataset_id,
+    )
+
+
 @_auto_client
 @validate_call(config=VALIDATE_CONFIG)
 def bulkUpload(
diff --git a/src/ndi/cloud/api/ndi_matlab_python_bridge.yaml b/src/ndi/cloud/api/ndi_matlab_python_bridge.yaml
index ab0de1a..edbe64d 100644
--- a/src/ndi/cloud/api/ndi_matlab_python_bridge.yaml
+++ b/src/ndi/cloud/api/ndi_matlab_python_bridge.yaml
@@ -473,6 +473,53 @@ functions:
       Python convenience that combines getBulkUploadURL + putFiles.
       MATLAB does these as separate steps.
 
+  - name: bulkFetch
+    matlab_path: "+ndi/+cloud/+api/+documents/bulkFetch.m"
+    matlab_last_sync_hash: "bacdd0c3"
+    python_path: "ndi/cloud/api/documents.py"
+    input_arguments:
+      - name: dataset_id
+        type_matlab: "string"
+        type_python: "CloudId"
+      - name: doc_ids
+        type_matlab: "string array"
+        type_python: "list[str]"
+      - name: client
+        type_python: "_Client"
+        default: "None"
+    output_arguments:
+      - name: documents
+        type_python: "list[dict[str, Any]]"
+    decision_log: >
+      Synchronized with MATLAB main as of 2026-04-20. Synchronous bulk
+      fetch of up to 500 documents by ID via POST /datasets/{datasetId}
+      /documents/bulk-fetch. Mirrors MATLAB input validation: non-empty,
+      <= 500 entries, each a 24-character hex string. MATLAB returns
+      (b, answer, apiResponse, apiURL); Python returns only the documents
+      list (the 'answer'), consistent with other api.* wrappers that
+      delegate HTTP metadata to CloudClient.
+
+  - name: documentClassCounts
+    matlab_path: "+ndi/+cloud/+api/+documents/documentClassCounts.m"
+    matlab_last_sync_hash: "12bfe81"
+    python_path: "ndi/cloud/api/documents.py"
+    input_arguments:
+      - name: dataset_id
+        type_matlab: "string"
+        type_python: "CloudId"
+      - name: client
+        type_python: "_Client"
+        default: "None"
+    output_arguments:
+      - name: result
+        type_python: "dict[str, Any]"
+    decision_log: >
+      Synchronized with MATLAB main as of 2026-04-20. GET /datasets/
+      {datasetId}/document-class-counts. Returns a flat histogram of
+      leaf data.document_class.class_name with fields datasetId,
+      totalDocuments, and classCounts (a mapping of class name to int).
+      No inheritance roll-up is performed.
+
   - name: getBulkUploadURL
     matlab_path: "+ndi/+cloud/+api/+documents/getBulkUploadURL.m"
     matlab_last_sync_hash: "9b75c0fe"
diff --git a/tests/test_cloud_api_documents.py b/tests/test_cloud_api_documents.py
new file mode 100644
index 0000000..2c5d1c8
--- /dev/null
+++ b/tests/test_cloud_api_documents.py
@@ -0,0 +1,91 @@
+"""Unit tests for ndi.cloud.api.documents — no network required."""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+import pytest
+
+
+def _make_client() -> MagicMock:
+    """Return a mock CloudClient."""
+    client = MagicMock()
+    client.config.org_id = "org-123"
+    client.config.api_url = "https://api.ndi-cloud.com/v1"
+    return client
+
+
+# --- 24-char hex helper for bulkFetch --------------------------------------
+_HEX24_A = "a" * 24
+_HEX24_B = "b" * 24
+
+
+class TestBulkFetch:
+    """bulkFetch validates inputs and POSTs to /documents/bulk-fetch."""
+
+    def test_returns_documents_list(self):
+        from ndi.cloud.api.documents import bulkFetch
+
+        client = _make_client()
+        client.post.return_value = {
+            "documents": [{"id": _HEX24_A, "name": "d1"}, {"id": _HEX24_B, "name": "d2"}]
+        }
+
+        docs = bulkFetch("ds-1", [_HEX24_A, _HEX24_B], client=client)
+
+        client.post.assert_called_once()
+        call = client.post.call_args
+        assert call.args[0] == "/datasets/{datasetId}/documents/bulk-fetch"
+        assert call.kwargs["datasetId"] == "ds-1"
+        assert call.kwargs["json"] == {"documentIds": [_HEX24_A, _HEX24_B]}
+        assert [d["name"] for d in docs] == ["d1", "d2"]
+
+    def test_empty_doc_ids_raises(self):
+        from ndi.cloud.api.documents import bulkFetch
+
+        with pytest.raises(ValueError, match="non-empty"):
+            bulkFetch("ds-1", [], client=_make_client())
+
+    def test_over_500_raises(self):
+        from ndi.cloud.api.documents import bulkFetch
+
+        ids = [_HEX24_A] * 501
+        with pytest.raises(ValueError, match="at most 500"):
+            bulkFetch("ds-1", ids, client=_make_client())
+
+    def test_non_hex_id_raises(self):
+        from ndi.cloud.api.documents import bulkFetch
+
+        with pytest.raises(ValueError, match="24-character hex"):
+            bulkFetch("ds-1", ["not-a-hex-id"], client=_make_client())
+
+    def test_missing_documents_field_returns_empty(self):
+        from ndi.cloud.api.documents import bulkFetch
+
+        client = _make_client()
+        client.post.return_value = {}
+        docs = bulkFetch("ds-1", [_HEX24_A], client=client)
+        assert docs == []
+
+
+class TestDocumentClassCounts:
+    """documentClassCounts GETs /document-class-counts and returns the struct."""
+
+    def test_returns_response_dict(self):
+        from ndi.cloud.api.documents import documentClassCounts
+
+        client = _make_client()
+        client.get.return_value = {
+            "datasetId": "ds-1",
+            "totalDocuments": 3,
+            "classCounts": {"ndi_document_probe": 2, "unknown": 1},
+        }
+
+        result = documentClassCounts("ds-1", client=client)
+
+        client.get.assert_called_once()
+        call = client.get.call_args
+        assert call.args[0] == "/datasets/{datasetId}/document-class-counts"
+        assert call.kwargs["datasetId"] == "ds-1"
+        assert result["totalDocuments"] == 3
+        assert result["classCounts"]["ndi_document_probe"] == 2

From ee3dfa96f5668d9c53c19a9849f98a32c50b033b Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 20 Apr 2026 23:53:58 +0000
Subject: [PATCH 2/3] Use isa operator in cloud ndiquery live tests

The cloud search API no longer exposes document_class.class_name as a
directly searchable field path. Class filtering now has to go through
the 'isa' operator, which also rolls up subclasses. This was causing
test_ndiqueryAll_paginates to return zero documents against the live
server.

Only the two cloud ndiquery tests are affected. Inline document bodies
(e.g. {"document_class": {"class_name": "..."}}) and local
session.database_search calls continue to use the field directly since
they are not cloud search structures.
---
 tests/test_cloud_live.py | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/tests/test_cloud_live.py b/tests/test_cloud_live.py
index 40ddd69..33e8d0d 100644
--- a/tests/test_cloud_live.py
+++ b/tests/test_cloud_live.py
@@ -794,13 +794,9 @@ def test_ndiquery_public(self, client):
         """ndiquery should return documents matching a search."""
         from ndi.cloud.api.documents import ndiquery
 
-        search = [
-            {
-                "field": "document_class.class_name",
-                "operation": "exact_string",
-                "param1": "session",
-            }
-        ]
+        # Class filtering on the cloud must go through the 'isa' operator;
+        # the document_class.class_name field path is no longer searchable.
+        search = [{"field": "", "operation": "isa", "param1": "session"}]
         result = _retry_on_server_error(
             lambda: ndiquery("public", search, page=1, page_size=5, client=client)
         )
@@ -828,13 +824,9 @@ def test_ndiqueryAll_paginates(self, client):
         """ndiqueryAll should auto-paginate results."""
         from ndi.cloud.api.documents import ndiqueryAll
 
-        search = [
-            {
-                "field": "document_class.class_name",
-                "operation": "exact_string",
-                "param1": "session",
-            }
-        ]
+        # Class filtering on the cloud must go through the 'isa' operator;
+        # the document_class.class_name field path is no longer searchable.
+        search = [{"field": "", "operation": "isa", "param1": "session"}]
         result = _retry_on_server_error(
             lambda: ndiqueryAll("public", search, page_size=3, client=client)
         )

From 43f156b9dc6f515c0ddcdad88f13af42bc6db659 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Tue, 21 Apr 2026 00:00:28 +0000
Subject: [PATCH 3/3] Use ndi_query.all() in gui.py document list refresh

Replaces the regex-on-document_class.class_name idiom with the
semantic equivalent ndi_query.all(), which is a static factory for
isa('base'). Matches the NDI-matlab ndi.query.all() convention and
avoids relying on the soon-to-be-removed document_class field path.
---
 src/ndi/gui/gui.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/ndi/gui/gui.py b/src/ndi/gui/gui.py
index 99b241b..1cd8456 100644
--- a/src/ndi/gui/gui.py
+++ b/src/ndi/gui/gui.py
@@ -139,9 +139,7 @@ def _update_db_list(self) -> None:
             try:
                 from ndi.query import ndi_query
 
-                doc_list = self._session.database_search(
-                    ndi_query("document_class.class_name", "regex", "(.*)", "")
-                )
+                doc_list = self._session.database_search(ndi_query.all())
             except Exception:
                 doc_list = []