From 15d5cee00b9fe57cafd0c61135b8df217baa67ee Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 20 Apr 2026 23:33:09 +0000 Subject: [PATCH 1/3] Port bulkFetch and documentClassCounts from ndi.cloud.api.documents Mirrors two new commands added to the MATLAB +ndi/+cloud/+api/+documents namespace. MATLAB routes them through +implementation wrappers that normalize output style; the Python port uses CloudClient for the same role, so no +implementation mirror is needed. INTERFACE UPDATE: Added bulkFetch and documentClassCounts entries to src/ndi/cloud/api/ndi_matlab_python_bridge.yaml. - bulkFetch: POST /datasets/{datasetId}/documents/bulk-fetch; mirrors MATLAB input validation (non-empty, <= 500 entries, 24-char hex IDs) and returns the 'documents' array. - documentClassCounts: GET /datasets/{datasetId}/document-class-counts; returns the datasetId/totalDocuments/classCounts struct. --- src/ndi/cloud/api/documents.py | 84 +++++++++++++++++ .../cloud/api/ndi_matlab_python_bridge.yaml | 47 ++++++++++ tests/test_cloud_api_documents.py | 91 +++++++++++++++++++ 3 files changed, 222 insertions(+) create mode 100644 tests/test_cloud_api_documents.py diff --git a/src/ndi/cloud/api/documents.py b/src/ndi/cloud/api/documents.py index 867ca4e..f1a4a8c 100644 --- a/src/ndi/cloud/api/documents.py +++ b/src/ndi/cloud/api/documents.py @@ -10,6 +10,7 @@ from __future__ import annotations +import re from typing import Annotated, Any from pydantic import SkipValidation, validate_call @@ -17,6 +18,9 @@ from ..client import APIResponse, CloudClient, _auto_client from ._validators import VALIDATE_CONFIG, CloudId, FilePath, PageNumber, PageSize, Scope +_HEX24 = re.compile(r"^[0-9a-fA-F]{24}$") +_BULK_FETCH_MAX = 500 + _Client = Annotated[CloudClient | None, SkipValidation()] @@ -178,6 +182,86 @@ def countDocuments(dataset_id: CloudId, *, client: _Client = None) -> int: return ds.get("documentCount", 0) +@_auto_client +@validate_call(config=VALIDATE_CONFIG) +def bulkFetch( + dataset_id: CloudId, + doc_ids: list[str], + *, + client: _Client = None, +) -> list[dict[str, Any]]: + """POST /datasets/{datasetId}/documents/bulk-fetch + + Synchronously fetch up to 500 documents (with full data) from a + dataset in a single call. This is the fast synchronous companion + to the asynchronous :func:`getBulkDownloadURL` pipeline and is + intended for small sets (e.g. a subset of IDs returned by + :func:`ndiquery`). + + Documents that do not exist, are soft-deleted, or do not belong to + the specified dataset are silently omitted from the response. The + order of the returned documents is not guaranteed to match the + request order. + + MATLAB equivalent: +cloud/+api/+documents/bulkFetch.m + + Args: + dataset_id: The ID of the dataset containing the documents. + doc_ids: Document IDs to fetch. Must be non-empty, at most 500 + entries, and each entry must be a 24-character hex string. + client: Authenticated cloud client (auto-created if omitted). + + Returns: + A list of document dicts, each with fields ``id``, ``ndiId``, + ``name``, ``className``, ``datasetId``, and ``data``. + """ + if not doc_ids: + raise ValueError("doc_ids must be non-empty") + if len(doc_ids) > _BULK_FETCH_MAX: + raise ValueError(f"doc_ids must have at most {_BULK_FETCH_MAX} entries") + for did in doc_ids: + if not _HEX24.match(did): + raise ValueError(f"doc_ids entries must be 24-character hex strings: {did!r}") + result = client.post( + "/datasets/{datasetId}/documents/bulk-fetch", + json={"documentIds": list(doc_ids)}, + datasetId=dataset_id, + ) + return result.get("documents", []) if isinstance(result, dict) else list(result or []) + + +@_auto_client +@validate_call(config=VALIDATE_CONFIG) +def documentClassCounts( + dataset_id: CloudId, + *, + client: _Client = None, +) -> dict[str, Any]: + """GET /datasets/{datasetId}/document-class-counts + + Retrieve a flat histogram of documents in a dataset grouped by leaf + ``data.document_class.class_name``. No inheritance roll-up is + performed; for class-aware drill-downs use :func:`ndiquery` with + the ``isa`` operator. + + MATLAB equivalent: +cloud/+api/+documents/documentClassCounts.m + + Args: + dataset_id: The ID of the dataset to query. + client: Authenticated cloud client (auto-created if omitted). + + Returns: + Dict with fields ``datasetId``, ``totalDocuments``, and + ``classCounts`` (a mapping of class name to integer count). + Documents with missing/empty ``class_name`` are bucketed under + ``'unknown'``. + """ + return client.get( + "/datasets/{datasetId}/document-class-counts", + datasetId=dataset_id, + ) + + @_auto_client @validate_call(config=VALIDATE_CONFIG) def bulkUpload( diff --git a/src/ndi/cloud/api/ndi_matlab_python_bridge.yaml b/src/ndi/cloud/api/ndi_matlab_python_bridge.yaml index ab0de1a..edbe64d 100644 --- a/src/ndi/cloud/api/ndi_matlab_python_bridge.yaml +++ b/src/ndi/cloud/api/ndi_matlab_python_bridge.yaml @@ -473,6 +473,53 @@ functions: Python convenience that combines getBulkUploadURL + putFiles. MATLAB does these as separate steps. + - name: bulkFetch + matlab_path: "+ndi/+cloud/+api/+documents/bulkFetch.m" + matlab_last_sync_hash: "bacdd0c3" + python_path: "ndi/cloud/api/documents.py" + input_arguments: + - name: dataset_id + type_matlab: "string" + type_python: "CloudId" + - name: doc_ids + type_matlab: "string array" + type_python: "list[str]" + - name: client + type_python: "_Client" + default: "None" + output_arguments: + - name: documents + type_python: "list[dict[str, Any]]" + decision_log: > + Synchronized with MATLAB main as of 2026-04-20. Synchronous bulk + fetch of up to 500 documents by ID via POST /datasets/{datasetId} + /documents/bulk-fetch. Mirrors MATLAB input validation: non-empty, + <= 500 entries, each a 24-character hex string. MATLAB returns + (b, answer, apiResponse, apiURL); Python returns only the documents + list (the 'answer'), consistent with other api.* wrappers that + delegate HTTP metadata to CloudClient. + + - name: documentClassCounts + matlab_path: "+ndi/+cloud/+api/+documents/documentClassCounts.m" + matlab_last_sync_hash: "12bfe81" + python_path: "ndi/cloud/api/documents.py" + input_arguments: + - name: dataset_id + type_matlab: "string" + type_python: "CloudId" + - name: client + type_python: "_Client" + default: "None" + output_arguments: + - name: result + type_python: "dict[str, Any]" + decision_log: > + Synchronized with MATLAB main as of 2026-04-20. GET /datasets/ + {datasetId}/document-class-counts. Returns a flat histogram of + leaf data.document_class.class_name with fields datasetId, + totalDocuments, and classCounts (a mapping of class name to int). + No inheritance roll-up is performed. + - name: getBulkUploadURL matlab_path: "+ndi/+cloud/+api/+documents/getBulkUploadURL.m" matlab_last_sync_hash: "9b75c0fe" diff --git a/tests/test_cloud_api_documents.py b/tests/test_cloud_api_documents.py new file mode 100644 index 0000000..2c5d1c8 --- /dev/null +++ b/tests/test_cloud_api_documents.py @@ -0,0 +1,91 @@ +"""Unit tests for ndi.cloud.api.documents — no network required.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + + +def _make_client() -> MagicMock: + """Return a mock CloudClient.""" + client = MagicMock() + client.config.org_id = "org-123" + client.config.api_url = "https://api.ndi-cloud.com/v1" + return client + + +# --- 24-char hex helper for bulkFetch -------------------------------------- +_HEX24_A = "a" * 24 +_HEX24_B = "b" * 24 + + +class TestBulkFetch: + """bulkFetch validates inputs and POSTs to /documents/bulk-fetch.""" + + def test_returns_documents_list(self): + from ndi.cloud.api.documents import bulkFetch + + client = _make_client() + client.post.return_value = { + "documents": [{"id": _HEX24_A, "name": "d1"}, {"id": _HEX24_B, "name": "d2"}] + } + + docs = bulkFetch("ds-1", [_HEX24_A, _HEX24_B], client=client) + + client.post.assert_called_once() + call = client.post.call_args + assert call.args[0] == "/datasets/{datasetId}/documents/bulk-fetch" + assert call.kwargs["datasetId"] == "ds-1" + assert call.kwargs["json"] == {"documentIds": [_HEX24_A, _HEX24_B]} + assert [d["name"] for d in docs] == ["d1", "d2"] + + def test_empty_doc_ids_raises(self): + from ndi.cloud.api.documents import bulkFetch + + with pytest.raises(ValueError, match="non-empty"): + bulkFetch("ds-1", [], client=_make_client()) + + def test_over_500_raises(self): + from ndi.cloud.api.documents import bulkFetch + + ids = [_HEX24_A] * 501 + with pytest.raises(ValueError, match="at most 500"): + bulkFetch("ds-1", ids, client=_make_client()) + + def test_non_hex_id_raises(self): + from ndi.cloud.api.documents import bulkFetch + + with pytest.raises(ValueError, match="24-character hex"): + bulkFetch("ds-1", ["not-a-hex-id"], client=_make_client()) + + def test_missing_documents_field_returns_empty(self): + from ndi.cloud.api.documents import bulkFetch + + client = _make_client() + client.post.return_value = {} + docs = bulkFetch("ds-1", [_HEX24_A], client=client) + assert docs == [] + + +class TestDocumentClassCounts: + """documentClassCounts GETs /document-class-counts and returns the struct.""" + + def test_returns_response_dict(self): + from ndi.cloud.api.documents import documentClassCounts + + client = _make_client() + client.get.return_value = { + "datasetId": "ds-1", + "totalDocuments": 3, + "classCounts": {"ndi_document_probe": 2, "unknown": 1}, + } + + result = documentClassCounts("ds-1", client=client) + + client.get.assert_called_once() + call = client.get.call_args + assert call.args[0] == "/datasets/{datasetId}/document-class-counts" + assert call.kwargs["datasetId"] == "ds-1" + assert result["totalDocuments"] == 3 + assert result["classCounts"]["ndi_document_probe"] == 2 From ee3dfa96f5668d9c53c19a9849f98a32c50b033b Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 20 Apr 2026 23:53:58 +0000 Subject: [PATCH 2/3] Use isa operator in cloud ndiquery live tests The cloud search API no longer exposes document_class.class_name as a directly searchable field path. Class filtering now has to go through the 'isa' operator, which also rolls up subclasses. This was causing test_ndiqueryAll_paginates to return zero documents against the live server. Only the two cloud ndiquery tests are affected. Inline document bodies (e.g. {"document_class": {"class_name": "..."}}) and local session.database_search calls continue to use the field directly since they are not cloud search structures. --- tests/test_cloud_live.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/tests/test_cloud_live.py b/tests/test_cloud_live.py index 40ddd69..33e8d0d 100644 --- a/tests/test_cloud_live.py +++ b/tests/test_cloud_live.py @@ -794,13 +794,9 @@ def test_ndiquery_public(self, client): """ndiquery should return documents matching a search.""" from ndi.cloud.api.documents import ndiquery - search = [ - { - "field": "document_class.class_name", - "operation": "exact_string", - "param1": "session", - } - ] + # Class filtering on the cloud must go through the 'isa' operator; + # the document_class.class_name field path is no longer searchable. + search = [{"field": "", "operation": "isa", "param1": "session"}] result = _retry_on_server_error( lambda: ndiquery("public", search, page=1, page_size=5, client=client) ) @@ -828,13 +824,9 @@ def test_ndiqueryAll_paginates(self, client): """ndiqueryAll should auto-paginate results.""" from ndi.cloud.api.documents import ndiqueryAll - search = [ - { - "field": "document_class.class_name", - "operation": "exact_string", - "param1": "session", - } - ] + # Class filtering on the cloud must go through the 'isa' operator; + # the document_class.class_name field path is no longer searchable. + search = [{"field": "", "operation": "isa", "param1": "session"}] result = _retry_on_server_error( lambda: ndiqueryAll("public", search, page_size=3, client=client) ) From 43f156b9dc6f515c0ddcdad88f13af42bc6db659 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 21 Apr 2026 00:00:28 +0000 Subject: [PATCH 3/3] Use ndi_query.all() in gui.py document list refresh Replaces the regex-on-document_class.class_name idiom with the semantic equivalent ndi_query.all(), which is a static factory for isa('base'). Matches the NDI-matlab ndi.query.all() convention and avoids relying on the soon-to-be-removed document_class field path. --- src/ndi/gui/gui.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/ndi/gui/gui.py b/src/ndi/gui/gui.py index 99b241b..1cd8456 100644 --- a/src/ndi/gui/gui.py +++ b/src/ndi/gui/gui.py @@ -139,9 +139,7 @@ def _update_db_list(self) -> None: try: from ndi.query import ndi_query - doc_list = self._session.database_search( - ndi_query("document_class.class_name", "regex", "(.*)", "") - ) + doc_list = self._session.database_search(ndi_query.all()) except Exception: doc_list = []