Bugfixes. Missing instances and tags

LeoEATLinden · LeoEATLinden · commit 56eba72993fc · 2023-06-25T10:11:52.000+02:00
diff --git a/src/superannotate_databricks_connector/schemas/text_schema.py b/src/superannotate_databricks_connector/schemas/text_schema.py
@@ -42,7 +42,6 @@ def get_text_schema():
     schema = StructType([
         StructField("name", StringType(), True),
         StructField("url", StringType(), True),
-        StructField("contentLength", IntegerType(), True),
         StructField("projectId", IntegerType(), True),
         StructField("status", StringType(), True),
         StructField("annotatorEmail", StringType(), True),
diff --git a/src/superannotate_databricks_connector/schemas/vector_schema.py b/src/superannotate_databricks_connector/schemas/vector_schema.py
@@ -59,6 +59,23 @@ def get_vector_instance_schema():
     return instance_schema
 
 
+def get_vector_tag_schema():
+    schema = StructType([
+        StructField("instance_type", StringType(), True),
+        StructField("classId", IntegerType(), True),
+        StructField("probability", IntegerType(), True),
+        StructField("attributes", ArrayType(MapType(StringType(),
+                                                    StringType())),
+                    True),
+        StructField("createdAt", StringType(), True),
+        StructField("createdBy", MapType(StringType(), StringType()), True),
+        StructField("creationType", StringType(), True),
+        StructField("updatedAt", StringType(), True),
+        StructField("updatedBy", MapType(StringType(), StringType()), True),
+        StructField("className", StringType(), True)])
+    return schema
+
+
 def get_vector_schema():
     schema = StructType([
         StructField("image_height", IntegerType(), True),
@@ -73,6 +90,7 @@ def get_vector_schema():
         StructField("instances", ArrayType(get_vector_instance_schema()),
                     True),
         StructField("bounding_boxes", ArrayType(IntegerType()), True),
-        StructField("comments", ArrayType(get_comment_schema()), True)
+        StructField("comments", ArrayType(get_comment_schema()), True),
+        StructField("tags", ArrayType(get_vector_tag_schema()), True)
     ])
     return schema
diff --git a/src/superannotate_databricks_connector/text.py b/src/superannotate_databricks_connector/text.py
@@ -1,5 +1,7 @@
 from datetime import datetime
-from superannotate_databricks_connector.schemas.text_schema import get_text_schema
+from superannotate_databricks_connector.schemas.text_schema import (
+    get_text_schema
+)
 
 
 def convert_dates(instance):
@@ -40,7 +42,6 @@ def get_text_dataframe(annotations, spark):
         flattened_item = {
             "name": item["metadata"]["name"],
             "url": item["metadata"]["url"],
-            "contentLength": item["metadata"]["contentLength"],
             "projecId": item["metadata"]["projectId"],
             "status": item["metadata"]["status"],
             "annotatorEmail": item["metadata"]["annotatorEmail"],
diff --git a/src/superannotate_databricks_connector/vector.py b/src/superannotate_databricks_connector/vector.py
@@ -1,4 +1,6 @@
-from superannotate_databricks_connector.schemas.vector_schema import get_vector_schema
+from superannotate_databricks_connector.schemas.vector_schema import (
+    get_vector_schema
+)
 
 
 def process_comment(comment):
diff --git a/tests/test_vector.py b/tests/test_vector.py
@@ -17,11 +17,13 @@
 class TestVectorInstances(unittest.TestCase):
     def __init__(self, *args):
         super().__init__(*args)
-        with open(os.path.join(DATA_SET_PATH, "vector/example_annotation.json"), "r") as f:
+        with open(os.path.join(DATA_SET_PATH,
+                               "vector/example_annotation.json"), "r") as f:
             data = json.load(f)
 
         target_data = []
-        with open(os.path.join(DATA_SET_PATH, 'vector/expected_instances.json'),"r") as f:
+        with open(os.path.join(DATA_SET_PATH,
+                               'vector/expected_instances.json'), "r") as f:
             for line in f:
                 target_data.append(json.loads(line))
 
@@ -96,20 +98,22 @@ def test_get_boxes(self):
                          "y1": 2.1,
                          "y2": 18.9
                      },
-                      "classId": 10229}]
+            "classId": 10229}]
         target = [2, 1, 13, 22, 10228, 3, 2, 4, 19, 10229]
         self.assertEqual(get_boxes(instances), target)
 
 
 class TestVectorDataFrame(unittest.TestCase):
     def test_vector_dataframe(self):
         spark = SparkSession.builder.master("local").getOrCreate()
-        with open(os.path.join(DATA_SET_PATH, "vector/example_annotation.json"),"r") as f:
+        with open(os.path.join(DATA_SET_PATH,
+                               "vector/example_annotation.json"), "r") as f:
             data = json.load(f)
 
         actual_df = get_vector_dataframe([data], spark)
 
-        expected_df = spark.read.parquet(os.path.join(DATA_SET_PATH, "vector/expected_df.parquet"))
+        expected_df = spark.read.parquet(os.path.join(
+            DATA_SET_PATH, "vector/expected_df.parquet"))
         self.assertEqual(sorted(actual_df.collect()),
                          sorted(expected_df.collect()))