From 409768765c7ce136bb4ada2dbc710a1142d67ae4 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 23 Jul 2025 09:12:18 +0200 Subject: [PATCH 01/17] datatypes attribute text list --- business_objects/embedding.py | 5 ++++- business_objects/record.py | 2 +- enums.py | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/business_objects/embedding.py b/business_objects/embedding.py index 98068ac..93ba235 100644 --- a/business_objects/embedding.py +++ b/business_objects/embedding.py @@ -323,7 +323,10 @@ def __build_payload_selector( and data_type != enums.DataTypes.PERMISSION.value ): payload_selector += f"'{attr}', (r.\"data\"->>'{attr}')::{data_type}" - elif data_type == enums.DataTypes.PERMISSION.value: + elif ( + data_type == enums.DataTypes.PERMISSION.value + or data_type == enums.DataTypes.TEXT_LIST.value + ): payload_selector += f"'{attr}', r.\"data\"->'{attr}'" else: payload_selector += f"'{attr}', r.\"data\"->>'{attr}'" diff --git a/business_objects/record.py b/business_objects/record.py index 3657229..637f38b 100644 --- a/business_objects/record.py +++ b/business_objects/record.py @@ -943,7 +943,7 @@ def get_first_no_text_column(project_id: str, record_id: str) -> str: ( SELECT a.name FROM attribute a - WHERE data_type NOT IN('{enums.DataTypes.TEXT.value}' , '{enums.DataTypes.CATEGORY.value}', '{enums.DataTypes.LLM_RESPONSE.value}') + WHERE data_type NOT IN('{enums.DataTypes.TEXT.value}' , '{enums.DataTypes.CATEGORY.value}', '{enums.DataTypes.LLM_RESPONSE.value}', {enums.DataTypes.TEXT_LIST.value}') AND a.state IN ('{enums.AttributeState.AUTOMATICALLY_CREATED.value}','{enums.AttributeState.UPLOADED.value}','{enums.AttributeState.USABLE.value}') AND a.project_id = '{project_id}' ORDER BY a.relative_position diff --git a/enums.py b/enums.py index 388674a..69557b8 100644 --- a/enums.py +++ b/enums.py @@ -11,6 +11,7 @@ class DataTypes(Enum): LLM_RESPONSE = "LLM_RESPONSE" EMBEDDING_LIST = "EMBEDDING_LIST" # only for embeddings & default hidden PERMISSION = "PERMISSION" # used for access control + TEXT_LIST = "TEXT_LIST" UNKNOWN = "UNKNOWN" From 5e23e9828ba9f57124cc13ddd41f92258b64f412 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 23 Jul 2025 09:14:11 +0200 Subject: [PATCH 02/17] payload selector --- business_objects/embedding.py | 1 + 1 file changed, 1 insertion(+) diff --git a/business_objects/embedding.py b/business_objects/embedding.py index 93ba235..c4c7674 100644 --- a/business_objects/embedding.py +++ b/business_objects/embedding.py @@ -321,6 +321,7 @@ def __build_payload_selector( data_type != enums.DataTypes.TEXT.value and data_type != enums.DataTypes.LLM_RESPONSE.value and data_type != enums.DataTypes.PERMISSION.value + and data_type != enums.DataTypes.TEXT_LIST.value ): payload_selector += f"'{attr}', (r.\"data\"->>'{attr}')::{data_type}" elif ( From f14df6771e4dd02ac062974b3ccd024bceeda63c Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Tue, 29 Jul 2025 14:34:53 +0200 Subject: [PATCH 03/17] IntegrationSharepointPropertySync --- enums.py | 1 + models.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/enums.py b/enums.py index 69557b8..0f63a24 100644 --- a/enums.py +++ b/enums.py @@ -169,6 +169,7 @@ class Tablenames(Enum): INTEGRATION_PDF = "pdf" INTEGRATION_SHAREPOINT = "sharepoint" STEP_TEMPLATES = "step_templates" # templates for strategy steps + INTEGRATION_SHAREPOINT_PROPERTY_SYNC = "sharepoint_property_sync" def snake_case_to_pascal_case(self): # the type name (written in PascalCase) of a table is needed to create backrefs diff --git a/models.py b/models.py index 57c891f..3e4bf02 100644 --- a/models.py +++ b/models.py @@ -2388,3 +2388,23 @@ class IntegrationSharepoint(Base): hashes = Column(JSON) permissions = Column(JSON) file_properties = Column(JSON) + + +class IntegrationSharepointPropertySync(Base): + __tablename__ = Tablenames.INTEGRATION_SHAREPOINT_PROPERTY_SYNC.value + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + created_by = Column( + UUID(as_uuid=True), + ForeignKey(f"{Tablenames.USER.value}.id", ondelete="SET NULL"), + index=True, + ) + created_at = Column(DateTime, default=sql.func.now()) + updated_at = Column(DateTime, onupdate=sql.func.now()) + integration_id = Column( + UUID(as_uuid=True), + ForeignKey(f"cognition.{Tablenames.INTEGRATION.value}.id", ondelete="CASCADE"), + index=True, + ) + config = Column(JSON) # JSON object containing the rules for property sync + logs = Column(ARRAY(String)) + state = Column(String) From 05aac2a575182235405dd084a028a1294d2435ad Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Tue, 29 Jul 2025 16:21:32 +0200 Subject: [PATCH 04/17] schema --- models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/models.py b/models.py index 3e4bf02..78a54a4 100644 --- a/models.py +++ b/models.py @@ -2392,6 +2392,7 @@ class IntegrationSharepoint(Base): class IntegrationSharepointPropertySync(Base): __tablename__ = Tablenames.INTEGRATION_SHAREPOINT_PROPERTY_SYNC.value + __table_args__ = (UniqueConstraint("integration_id"), {"schema": "integration"}) id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) created_by = Column( UUID(as_uuid=True), From 4660d9db970cad45c0d560823f49327e15b697f2 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Tue, 29 Jul 2025 16:22:58 +0200 Subject: [PATCH 05/17] get update sync properties --- .../integration_sharepoint_property_sync.py | 32 +++++++++++++ .../integration_sharepoint_property_sync.py | 48 +++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 cognition_objects/integration_sharepoint_property_sync.py create mode 100644 integration_objects/integration_sharepoint_property_sync.py diff --git a/cognition_objects/integration_sharepoint_property_sync.py b/cognition_objects/integration_sharepoint_property_sync.py new file mode 100644 index 0000000..42a9be0 --- /dev/null +++ b/cognition_objects/integration_sharepoint_property_sync.py @@ -0,0 +1,32 @@ +from models import IntegrationSharepointPropertySync +from typing import List, Optional, Dict, Any +from sqlalchemy.orm.attributes import flag_modified +from ..business_objects import general +from ..session import session + + +def get_by_integration_id( + integration_id: str, +) -> List[IntegrationSharepointPropertySync]: + return ( + session.query(IntegrationSharepointPropertySync) + .filter(IntegrationSharepointPropertySync.integration_id == integration_id) + .first() + ) + + +def update( + integration_id: str, + config: Optional[Dict[str, Any]] = None, + logs: Optional[List[Dict[str, Any]]] = None, + with_commit: bool = True, +) -> IntegrationSharepointPropertySync: + integration_sync = get_by_integration_id(integration_id) + if config is not None: + integration_sync.config = config + flag_modified(integration_sync, "config") + if logs is not None: + integration_sync.logs = logs + flag_modified(integration_sync, "logs") + general.flush_or_commit(with_commit) + return integration_sync diff --git a/integration_objects/integration_sharepoint_property_sync.py b/integration_objects/integration_sharepoint_property_sync.py new file mode 100644 index 0000000..a0f0b3f --- /dev/null +++ b/integration_objects/integration_sharepoint_property_sync.py @@ -0,0 +1,48 @@ +from models import IntegrationSharepointPropertySync +from typing import List, Optional, Dict, Any +from sqlalchemy.orm.attributes import flag_modified +from ..business_objects import general +from ..session import session + + +def get_by_integration_id( + integration_id: str, +) -> List[IntegrationSharepointPropertySync]: + return ( + session.query(IntegrationSharepointPropertySync) + .filter(IntegrationSharepointPropertySync.integration_id == integration_id) + .first() + ) + + +def create( + integration_id: str, + config: Optional[Dict[str, Any]] = None, + logs: Optional[List[Dict[str, Any]]] = None, + with_commit: bool = True, +) -> IntegrationSharepointPropertySync: + integration_sync = IntegrationSharepointPropertySync( + integration_id=integration_id, + config=config or {}, + logs=logs or [], + ) + session.add(integration_sync) + general.flush_or_commit(with_commit) + return integration_sync + + +def update( + integration_id: str, + config: Optional[Dict[str, Any]] = None, + logs: Optional[List[Dict[str, Any]]] = None, + with_commit: bool = True, +) -> IntegrationSharepointPropertySync: + integration_sync = get_by_integration_id(integration_id) + if config is not None: + integration_sync.config = config + flag_modified(integration_sync, "config") + if logs is not None: + integration_sync.logs = logs + flag_modified(integration_sync, "logs") + general.flush_or_commit(with_commit) + return integration_sync From 54de49b1dcab32b9014b84ca479bb43c9d71c2ba Mon Sep 17 00:00:00 2001 From: Lina Date: Wed, 30 Jul 2025 13:46:33 +0200 Subject: [PATCH 06/17] Syntax fix --- business_objects/record.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/business_objects/record.py b/business_objects/record.py index 637f38b..a67a0f2 100644 --- a/business_objects/record.py +++ b/business_objects/record.py @@ -943,7 +943,7 @@ def get_first_no_text_column(project_id: str, record_id: str) -> str: ( SELECT a.name FROM attribute a - WHERE data_type NOT IN('{enums.DataTypes.TEXT.value}' , '{enums.DataTypes.CATEGORY.value}', '{enums.DataTypes.LLM_RESPONSE.value}', {enums.DataTypes.TEXT_LIST.value}') + WHERE data_type NOT IN('{enums.DataTypes.TEXT.value}', '{enums.DataTypes.CATEGORY.value}', '{enums.DataTypes.LLM_RESPONSE.value}', '{enums.DataTypes.TEXT_LIST.value}') AND a.state IN ('{enums.AttributeState.AUTOMATICALLY_CREATED.value}','{enums.AttributeState.UPLOADED.value}','{enums.AttributeState.USABLE.value}') AND a.project_id = '{project_id}' ORDER BY a.relative_position From 6af406272366b45c3a089e4f31701e99318f664f Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 30 Jul 2025 16:04:13 +0200 Subject: [PATCH 07/17] import fix --- integration_objects/integration_sharepoint_property_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_objects/integration_sharepoint_property_sync.py b/integration_objects/integration_sharepoint_property_sync.py index a0f0b3f..1824c5c 100644 --- a/integration_objects/integration_sharepoint_property_sync.py +++ b/integration_objects/integration_sharepoint_property_sync.py @@ -1,4 +1,4 @@ -from models import IntegrationSharepointPropertySync +from submodules.model import IntegrationSharepointPropertySync from typing import List, Optional, Dict, Any from sqlalchemy.orm.attributes import flag_modified from ..business_objects import general From 9e32bfc8c66f0691e6c141372894e9e362d4ef79 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Wed, 30 Jul 2025 18:55:25 +0200 Subject: [PATCH 08/17] proeprt sync state --- enums.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/enums.py b/enums.py index 0f63a24..d103277 100644 --- a/enums.py +++ b/enums.py @@ -931,3 +931,10 @@ def from_string(value: str): raise KeyError( f"Could not parse CognitionIntegrationType from string '{changed_value}'" ) + + +class SharepointPropertySyncState(Enum): + CREATED = "CREATED" + RUNNING = "RUNNING" + COMPLETED = "COMPLETED" + FAILED = "FAILED" From 3bd166df5a41398859e37665b4ea8152a4ec88da Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 31 Jul 2025 17:43:25 +0200 Subject: [PATCH 09/17] chore: add extension TODO comment --- integration_objects/manager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/integration_objects/manager.py b/integration_objects/manager.py index 7979678..ea2a53f 100644 --- a/integration_objects/manager.py +++ b/integration_objects/manager.py @@ -92,6 +92,8 @@ def get_existing_integration_records( integration_id: str, by: str = "source", ) -> Dict[str, object]: + # TODO(extension): make return type Dict[str, List[object]] + # once an object_id can reference multiple different integration records return { getattr(record, by, record.source): record for record in get_all_by_integration_id(IntegrationModel, integration_id) From 9d797fea0994083122bfe8e8ed7082e0fc740737 Mon Sep 17 00:00:00 2001 From: LennartSchmidtKern Date: Fri, 1 Aug 2025 20:22:44 +0200 Subject: [PATCH 10/17] get by running ids --- business_objects/record.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/business_objects/record.py b/business_objects/record.py index a67a0f2..c8ebd02 100644 --- a/business_objects/record.py +++ b/business_objects/record.py @@ -968,3 +968,16 @@ def get_record_ids_by_running_ids(project_id: str, running_ids: List[int]) -> Li .all() ) ] + + +def get_records_by_running_ids(project_id: str, running_ids: List[int]) -> List[str]: + return ( + session.query(Record) + .filter( + Record.project_id == project_id, + Record.data[attribute.get_running_id_name(project_id)] + .as_integer() + .in_(running_ids), + ) + .all() + ) From 468d73881f240ab7a26523ab7211fa4e5dc4160a Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 2 Sep 2025 08:40:45 +0200 Subject: [PATCH 11/17] perf: move sharepoint property sync into cognition objects --- .../integration_sharepoint_property_sync.py | 48 ------------------- 1 file changed, 48 deletions(-) delete mode 100644 integration_objects/integration_sharepoint_property_sync.py diff --git a/integration_objects/integration_sharepoint_property_sync.py b/integration_objects/integration_sharepoint_property_sync.py deleted file mode 100644 index 1824c5c..0000000 --- a/integration_objects/integration_sharepoint_property_sync.py +++ /dev/null @@ -1,48 +0,0 @@ -from submodules.model import IntegrationSharepointPropertySync -from typing import List, Optional, Dict, Any -from sqlalchemy.orm.attributes import flag_modified -from ..business_objects import general -from ..session import session - - -def get_by_integration_id( - integration_id: str, -) -> List[IntegrationSharepointPropertySync]: - return ( - session.query(IntegrationSharepointPropertySync) - .filter(IntegrationSharepointPropertySync.integration_id == integration_id) - .first() - ) - - -def create( - integration_id: str, - config: Optional[Dict[str, Any]] = None, - logs: Optional[List[Dict[str, Any]]] = None, - with_commit: bool = True, -) -> IntegrationSharepointPropertySync: - integration_sync = IntegrationSharepointPropertySync( - integration_id=integration_id, - config=config or {}, - logs=logs or [], - ) - session.add(integration_sync) - general.flush_or_commit(with_commit) - return integration_sync - - -def update( - integration_id: str, - config: Optional[Dict[str, Any]] = None, - logs: Optional[List[Dict[str, Any]]] = None, - with_commit: bool = True, -) -> IntegrationSharepointPropertySync: - integration_sync = get_by_integration_id(integration_id) - if config is not None: - integration_sync.config = config - flag_modified(integration_sync, "config") - if logs is not None: - integration_sync.logs = logs - flag_modified(integration_sync, "logs") - general.flush_or_commit(with_commit) - return integration_sync From 4976aa434372aa4dd29dbe08ff31c687d88a28c7 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 2 Sep 2025 08:55:40 +0200 Subject: [PATCH 12/17] perf: move create function into cognition objects --- .../integration_sharepoint_property_sync.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/cognition_objects/integration_sharepoint_property_sync.py b/cognition_objects/integration_sharepoint_property_sync.py index 42a9be0..a0f0b3f 100644 --- a/cognition_objects/integration_sharepoint_property_sync.py +++ b/cognition_objects/integration_sharepoint_property_sync.py @@ -15,6 +15,22 @@ def get_by_integration_id( ) +def create( + integration_id: str, + config: Optional[Dict[str, Any]] = None, + logs: Optional[List[Dict[str, Any]]] = None, + with_commit: bool = True, +) -> IntegrationSharepointPropertySync: + integration_sync = IntegrationSharepointPropertySync( + integration_id=integration_id, + config=config or {}, + logs=logs or [], + ) + session.add(integration_sync) + general.flush_or_commit(with_commit) + return integration_sync + + def update( integration_id: str, config: Optional[Dict[str, Any]] = None, From 6fbeb604513ba7a72c622c2913c273aa2d0dec2e Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Tue, 2 Sep 2025 16:29:03 +0200 Subject: [PATCH 13/17] perf: SYNC_SHAREPOINT_FILE_PROPERTIES task --- cognition_objects/integration_sharepoint_property_sync.py | 2 +- enums.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cognition_objects/integration_sharepoint_property_sync.py b/cognition_objects/integration_sharepoint_property_sync.py index a0f0b3f..20e73a6 100644 --- a/cognition_objects/integration_sharepoint_property_sync.py +++ b/cognition_objects/integration_sharepoint_property_sync.py @@ -1,8 +1,8 @@ -from models import IntegrationSharepointPropertySync from typing import List, Optional, Dict, Any from sqlalchemy.orm.attributes import flag_modified from ..business_objects import general from ..session import session +from ..models import IntegrationSharepointPropertySync def get_by_integration_id( diff --git a/enums.py b/enums.py index f361fcd..5ac4efd 100644 --- a/enums.py +++ b/enums.py @@ -514,6 +514,7 @@ class TaskType(Enum): RUN_COGNITION_MACRO = "RUN_COGNITION_MACRO" PARSE_COGNITION_FILE = "PARSE_COGNITION_FILE" EXECUTE_INTEGRATION = "EXECUTE_INTEGRATION" + SYNC_SHAREPOINT_FILE_PROPERTIES = "SYNC_SHAREPOINT_FILE_PROPERTIES" class TaskQueueAction(Enum): From 0f4cf24e4f7818a65a20d4f675f22cd47b8ba198 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Wed, 3 Sep 2025 08:24:55 +0200 Subject: [PATCH 14/17] perf: add sync_finished for task-master --- .../integration_sharepoint_property_sync.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/cognition_objects/integration_sharepoint_property_sync.py b/cognition_objects/integration_sharepoint_property_sync.py index 20e73a6..970dc7f 100644 --- a/cognition_objects/integration_sharepoint_property_sync.py +++ b/cognition_objects/integration_sharepoint_property_sync.py @@ -1,13 +1,20 @@ from typing import List, Optional, Dict, Any from sqlalchemy.orm.attributes import flag_modified +from ..models import IntegrationSharepointPropertySync +from ..enums import SharepointPropertySyncState from ..business_objects import general from ..session import session -from ..models import IntegrationSharepointPropertySync + + +FINISHED_STATES = [ + SharepointPropertySyncState.COMPLETED.value, + SharepointPropertySyncState.FAILED.value, +] def get_by_integration_id( integration_id: str, -) -> List[IntegrationSharepointPropertySync]: +) -> IntegrationSharepointPropertySync: return ( session.query(IntegrationSharepointPropertySync) .filter(IntegrationSharepointPropertySync.integration_id == integration_id) @@ -46,3 +53,10 @@ def update( flag_modified(integration_sync, "logs") general.flush_or_commit(with_commit) return integration_sync + + +def sync_finished(integration_id: str) -> bool: + integration_sync = get_by_integration_id(integration_id) + if integration_sync is None: + return True + return integration_sync.state in FINISHED_STATES From 5b48fbd2c603a40044cd3c2cf85678845db99bc4 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 4 Sep 2025 09:17:04 +0200 Subject: [PATCH 15/17] perf: remove extra task type --- enums.py | 1 - 1 file changed, 1 deletion(-) diff --git a/enums.py b/enums.py index 5ac4efd..f361fcd 100644 --- a/enums.py +++ b/enums.py @@ -514,7 +514,6 @@ class TaskType(Enum): RUN_COGNITION_MACRO = "RUN_COGNITION_MACRO" PARSE_COGNITION_FILE = "PARSE_COGNITION_FILE" EXECUTE_INTEGRATION = "EXECUTE_INTEGRATION" - SYNC_SHAREPOINT_FILE_PROPERTIES = "SYNC_SHAREPOINT_FILE_PROPERTIES" class TaskQueueAction(Enum): From 8a02600159fe8f3e19ca718de671a73d98a8939a Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 4 Sep 2025 09:18:18 +0200 Subject: [PATCH 16/17] perf: remove extra task type --- cognition_objects/integration_sharepoint_property_sync.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cognition_objects/integration_sharepoint_property_sync.py b/cognition_objects/integration_sharepoint_property_sync.py index 970dc7f..8a67c68 100644 --- a/cognition_objects/integration_sharepoint_property_sync.py +++ b/cognition_objects/integration_sharepoint_property_sync.py @@ -53,10 +53,3 @@ def update( flag_modified(integration_sync, "logs") general.flush_or_commit(with_commit) return integration_sync - - -def sync_finished(integration_id: str) -> bool: - integration_sync = get_by_integration_id(integration_id) - if integration_sync is None: - return True - return integration_sync.state in FINISHED_STATES From c18e6217619d6ce06760d3e3d0f6b8d799cbecb5 Mon Sep 17 00:00:00 2001 From: andhreljaKern Date: Thu, 4 Sep 2025 15:31:28 +0200 Subject: [PATCH 17/17] perf: remove unused FINISHED_STATES --- cognition_objects/integration_sharepoint_property_sync.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/cognition_objects/integration_sharepoint_property_sync.py b/cognition_objects/integration_sharepoint_property_sync.py index 8a67c68..81afbd3 100644 --- a/cognition_objects/integration_sharepoint_property_sync.py +++ b/cognition_objects/integration_sharepoint_property_sync.py @@ -6,12 +6,6 @@ from ..session import session -FINISHED_STATES = [ - SharepointPropertySyncState.COMPLETED.value, - SharepointPropertySyncState.FAILED.value, -] - - def get_by_integration_id( integration_id: str, ) -> IntegrationSharepointPropertySync: