Skip to content

Commit ecd764f

Browse files
authored
Merge pull request #756 from superannotateai/develop
Develop
2 parents 62ebf7f + 4ada222 commit ecd764f

File tree

14 files changed

+415
-94
lines changed

14 files changed

+415
-94
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ coverage.xml
5454
*.py,cover
5555
.hypothesis/
5656
.pytest_cache/
57+
tests/tmp_test.py
5758

5859
# Translations
5960
*.mo

CHANGELOG.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,23 @@ History
66

77
All release highlights of this project will be documented in this file.
88

9+
10+
4.4.31 - Feb 27, 2025
11+
_____________________
12+
13+
**Added**
14+
15+
- Guide for Converting CSV and JSONL Formats.
16+
- New SDK Functionality Table.
17+
18+
**Updated**
19+
20+
- ``SAClient.attach_items_from_integrated_storage`` now supports Databricks integration, enabling efficient
21+
data fetching and mapping from Databricks into SuperAnnotate.
22+
923
4.4.30 - Feb 13, 2025
1024
_____________________
25+
1126
**Added**
1227

1328
- ``SAClient.list_users`` method lists contributors with optional custom field filtering.

docs/source/api_reference/api_team.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,5 @@ Team
1010
.. automethod:: superannotate.SAClient.get_user_metadata
1111
.. automethod:: superannotate.SAClient.set_user_custom_field
1212
.. automethod:: superannotate.SAClient.list_users
13+
.. automethod:: superannotate.SAClient.pause_user_activity
14+
.. automethod:: superannotate.SAClient.resume_user_activity

src/superannotate/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import sys
44

55

6-
__version__ = "4.4.31dev1"
6+
__version__ = "4.4.31dev2"
77

88
os.environ.update({"sa_version": __version__})
99
sys.path.append(os.path.split(os.path.realpath(__file__))[0])

src/superannotate/lib/app/interface/sdk_interface.py

Lines changed: 76 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,16 @@ def list_users(self, *, include: List[Literal["custom_fields"]] = None, **filter
461461
def pause_user_activity(
462462
self, pk: Union[int, str], projects: Union[List[int], List[str], Literal["*"]]
463463
):
464+
"""
465+
Block the team contributor from requesting items from the projects.
466+
467+
:param pk: The email address or user ID of the team contributor.
468+
:type pk: str or int
469+
470+
:param projects: A list of project names or IDs from which the user should be blocked.
471+
The special value "*" means block access to all projects
472+
:type projects: Union[List[int], List[str], Literal["*"]]
473+
"""
464474
user = self.controller.work_management.get_user_metadata(pk=pk)
465475
if user.role is not WMUserTypeEnum.Contributor:
466476
raise AppException("User must have a contributor role to pause activity.")
@@ -474,6 +484,16 @@ def pause_user_activity(
474484
def resume_user_activity(
475485
self, pk: Union[int, str], projects: Union[List[int], List[str], Literal["*"]]
476486
):
487+
"""
488+
Resume the team contributor from requesting items from the projects.
489+
490+
:param pk: The email address or user ID of the team contributor.
491+
:type pk: str or int
492+
493+
:param projects: A list of project names or IDs from which the user should be resumed.
494+
The special value "*" means resume access to all projects
495+
:type projects: Union[List[int], List[str], Literal["*"]]
496+
"""
477497
user = self.controller.work_management.get_user_metadata(pk=pk)
478498
if user.role is not WMUserTypeEnum.Contributor:
479499
raise AppException("User must have a contributor role to resume activity.")
@@ -2919,32 +2939,80 @@ def attach_items_from_integrated_storage(
29192939
project: NotEmptyStr,
29202940
integration: Union[NotEmptyStr, IntegrationEntity],
29212941
folder_path: Optional[NotEmptyStr] = None,
2942+
*,
2943+
query: Optional[NotEmptyStr] = None,
2944+
item_name_column: Optional[NotEmptyStr] = None,
2945+
custom_item_name: Optional[NotEmptyStr] = None,
2946+
component_mapping: Optional[Dict[str, str]] = None,
29222947
):
2923-
"""Link images from integrated external storage to SuperAnnotate.
2948+
"""Link images from integrated external storage to SuperAnnotate from AWS, GCP, Azure, Databricks.
29242949
29252950
:param project: project name or folder path where items should be attached (e.g., “project1/folder1”).
29262951
:type project: str
29272952
2928-
:param integration: existing integration name or metadata dict to pull items from.
2929-
Mandatory keys in integration metadata’s dict is “name”.
2953+
:param integration: The existing integration name or metadata dict to pull items from.
2954+
Mandatory keys in integration metadata’s dict is “name”.
29302955
:type integration: str or dict
29312956
29322957
:param folder_path: Points to an exact folder/directory within given storage.
2933-
If None, items are fetched from the root directory.
2958+
If None, items are fetched from the root directory.
29342959
:type folder_path: str
2960+
2961+
:param query: (Only for Databricks). The SQL query to retrieve specific columns from Databricks.
2962+
If provided, the function will execute the query and use the results for mapping and uploading.
2963+
:type query: Optional[str]
2964+
2965+
:param item_name_column: (Only for Databricks). The column name from the SQL query whose values
2966+
will be used as item names. If this is provided, custom_item_name cannot be used.
2967+
The column must exist in the query result.
2968+
:type item_name_column: Optional[str]
2969+
2970+
:param custom_item_name: (Only for Databricks). A manually defined prefix for item names.
2971+
A random 10-character suffix will be appended to ensure uniqueness.
2972+
If this is provided, item_name_column cannot be used.
2973+
:type custom_item_name: Optional[str]
2974+
2975+
:param component_mapping: (Only for Databricks). A dictionary mapping Databricks
2976+
columns to SuperAnnotate component IDs.
2977+
:type component_mapping: Optional[dict]
2978+
2979+
2980+
Request Example:
2981+
::
2982+
2983+
client.attach_items_from_integrated_storage(
2984+
project="project_name",
2985+
integration="databricks_integration",
2986+
query="SELECT * FROM integration_data LIMIT 10",
2987+
item_name_column="prompt",
2988+
component_mapping={
2989+
"category": "_item_category",
2990+
"prompt_id": "id",
2991+
"prompt": "prompt"
2992+
}
2993+
)
2994+
29352995
"""
29362996
project, folder = self.controller.get_project_folder_by_path(project)
29372997
_integration = None
29382998
if isinstance(integration, str):
29392999
integration = IntegrationEntity(name=integration)
29403000
for i in self.controller.integrations.list().data:
2941-
if integration.name == i.name:
3001+
if integration.name.lower() == i.name.lower():
29423002
_integration = i
29433003
break
29443004
else:
29453005
raise AppException("Integration not found.")
3006+
29463007
response = self.controller.integrations.attach_items(
2947-
project, folder, _integration, folder_path
3008+
project=project,
3009+
folder=folder,
3010+
integration=_integration,
3011+
folder_path=folder_path,
3012+
query=query,
3013+
item_name_column=item_name_column,
3014+
custom_item_name=custom_item_name,
3015+
component_mapping=component_mapping,
29483016
)
29493017
if response.errors:
29503018
raise AppException(response.errors)
@@ -3593,7 +3661,7 @@ def copy_items(
35933661
"skip", "replace", "replace_annotations_only"
35943662
] = "skip",
35953663
):
3596-
"""Copy images in bulk between folders in a project
3664+
"""Copy items in bulk between folders in a project
35973665
35983666
:param source: project name (root) or folder path to pick items from (e.g., “project1/folder1”).
35993667
:type source: str
@@ -3657,7 +3725,7 @@ def move_items(
36573725
"skip", "replace", "replace_annotations_only"
36583726
] = "skip",
36593727
):
3660-
"""Move images in bulk between folders in a project
3728+
"""Move items in bulk between folders in a project
36613729
36623730
:param source: project name (root) or folder path to pick items from (e.g., “project1/folder1”).
36633731
:type source: str

src/superannotate/lib/core/serviceproviders.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,7 @@ def get_upload_chunks(
490490
self,
491491
project: entities.ProjectEntity,
492492
item_ids: List[int],
493+
chunk_size: int = 1000,
493494
) -> Dict[str, List]:
494495
raise NotImplementedError
495496

@@ -592,6 +593,7 @@ def attach_items(
592593
folder: entities.FolderEntity,
593594
integration: entities.IntegrationEntity,
594595
folder_name: str = None,
596+
options: Dict[str, str] = None,
595597
) -> ServiceResponse:
596598
raise NotImplementedError
597599

Lines changed: 119 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
1-
from typing import List
1+
from typing import Dict
2+
from typing import Optional
23

4+
from lib.core.conditions import Condition
5+
from lib.core.conditions import CONDITION_EQ as EQ
36
from lib.core.entities import FolderEntity
47
from lib.core.entities import IntegrationEntity
58
from lib.core.entities import ProjectEntity
9+
from lib.core.entities.integrations import IntegrationTypeEnum
10+
from lib.core.enums import ProjectType
611
from lib.core.exceptions import AppException
712
from lib.core.reporter import Reporter
813
from lib.core.response import Response
@@ -25,6 +30,11 @@ def execute(self) -> Response:
2530

2631

2732
class AttachIntegrations(BaseReportableUseCase):
33+
MULTIMODAL_INTEGRATIONS = [
34+
IntegrationTypeEnum.DATABRICKS,
35+
IntegrationTypeEnum.SNOWFLAKE,
36+
]
37+
2838
def __init__(
2939
self,
3040
reporter: Reporter,
@@ -33,46 +43,137 @@ def __init__(
3343
service_provider: BaseServiceProvider,
3444
integration: IntegrationEntity,
3545
folder_path: str = None,
46+
query: Optional[str] = None,
47+
item_name_column: Optional[str] = None,
48+
custom_item_name: Optional[str] = None,
49+
component_mapping: Optional[Dict[str, str]] = None,
3650
):
37-
3851
super().__init__(reporter)
3952
self._project = project
4053
self._folder = folder
4154
self._integration = integration
4255
self._service_provider = service_provider
4356
self._folder_path = folder_path
57+
self._query = query
58+
self._item_name_column = item_name_column
59+
self._custom_item_name = custom_item_name
60+
self._component_mapping = component_mapping
61+
self._options = {} # using only for Databricks and Snowflake
62+
self._item_category_column = None
4463

4564
@property
4665
def _upload_path(self):
4766
return f"{self._project.name}{f'/{self._folder.name}' if self._folder.name != 'root' else ''}"
4867

49-
def execute(self) -> Response:
50-
integrations: List[
51-
IntegrationEntity
52-
] = self._service_provider.integrations.list().data.integrations
53-
integration_name_lower = self._integration.name.lower()
54-
integration = next(
55-
(i for i in integrations if i.name.lower() == integration_name_lower), None
68+
def validate_integration(self):
69+
# TODO add support in next iterations
70+
if self._integration.type == IntegrationTypeEnum.SNOWFLAKE:
71+
raise AppException(
72+
"Attaching items is not supported with Snowflake integration."
73+
)
74+
75+
if self._integration.type in self.MULTIMODAL_INTEGRATIONS:
76+
if self._project.type != ProjectType.MULTIMODAL:
77+
raise AppException(
78+
f"{self._integration.name} integration is supported only for Multimodal projects."
79+
)
80+
81+
def validate_options_for_multimodal_integration(self):
82+
if self._integration.type in self.MULTIMODAL_INTEGRATIONS:
83+
if self._item_name_column and self._custom_item_name:
84+
raise AppException(
85+
"‘item_name_column and custom_item_name cannot be used simultaneously."
86+
)
87+
88+
if not self._item_name_column and not self._custom_item_name:
89+
raise AppException(
90+
"Either item_name_column or custom_item_name is required."
91+
)
92+
93+
if not all((self._query, self._component_mapping)):
94+
raise AppException(
95+
f"{self._integration.name} integration requires both a query and component_mapping."
96+
)
97+
98+
category_setting: bool = bool(
99+
next(
100+
(
101+
setting.value
102+
for setting in self._service_provider.projects.list_settings(
103+
self._project
104+
).data
105+
if setting.attribute == "CategorizeItems"
106+
),
107+
None,
108+
)
109+
)
110+
if (
111+
not category_setting
112+
and "_item_category" in self._component_mapping.values()
113+
):
114+
raise AppException(
115+
"Item Category must be enabled for a project to use _item_category"
116+
)
117+
118+
self._item_category_column = next(
119+
(
120+
k
121+
for k, v in self._component_mapping.items()
122+
if v == "_item_category"
123+
),
124+
None,
125+
)
126+
if self._item_category_column:
127+
del self._component_mapping[self._item_category_column]
128+
129+
sa_components = [
130+
c.name.lower()
131+
for c in self._service_provider.annotation_classes.list(
132+
condition=Condition("project_id", self._project.id, EQ)
133+
).data
134+
]
135+
136+
for i in self._component_mapping.values():
137+
if i.lower() not in sa_components:
138+
raise AppException(
139+
f"Component mapping contains invalid component ID: `{i}`"
140+
)
141+
142+
def generate_options_for_multimodal_integration(self):
143+
self._options["query"] = self._query
144+
self._options["item_name"] = (
145+
self._custom_item_name if self._custom_item_name else self._item_name_column
56146
)
57-
if integration:
147+
self._options["prefix"] = True if self._custom_item_name else False
148+
self._options["column_class_map"] = self._component_mapping
149+
if self._item_category_column:
150+
self._options["item_category"] = self._item_category_column
151+
152+
def execute(self) -> Response:
153+
if self.is_valid():
154+
if self._integration.type in self.MULTIMODAL_INTEGRATIONS:
155+
self.generate_options_for_multimodal_integration()
156+
58157
self.reporter.log_info(
59158
"Attaching file(s) from "
60-
f"{integration.root}{f'/{self._folder_path}' if self._folder_path else ''} "
159+
f"{self._integration.root}{f'/{self._folder_path}' if self._folder_path else ''} "
61160
f"to {self._upload_path}. This may take some time."
62161
)
63-
attached = self._service_provider.integrations.attach_items(
162+
163+
attache_response = self._service_provider.integrations.attach_items(
64164
project=self._project,
65165
folder=self._folder,
66-
integration=integration,
67-
folder_name=self._folder_path,
166+
integration=self._integration,
167+
folder_name=self._folder_path
168+
if self._integration.type not in self.MULTIMODAL_INTEGRATIONS
169+
else None,
170+
options=self._options if self._options else None,
68171
)
69-
if not attached:
172+
if not attache_response.ok:
70173
self._response.errors = AppException(
71174
f"An error occurred for {self._integration.name}. Please make sure: "
72175
"\n - The bucket exists."
73176
"\n - The connection is valid."
74177
"\n - The path to a specified directory is correct."
75178
)
76-
else:
77-
self._response.errors = AppException("Integration not found.")
78-
return self._response
179+
return self._response

0 commit comments

Comments
 (0)