superannotateai
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 42 additions & 0 deletions b/‎.github/workflows/release.yml‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎readme.md‎
Lines changed: 8 additions & 0 deletions b/‎readme.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/superannotate_databricks_connector/schemas/text_schema.py‎
Lines changed: 0 additions & 1 deletion b/‎src/superannotate_databricks_connector/schemas/text_schema.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/superannotate_databricks_connector/schemas/vector_schema.py‎
Lines changed: 19 additions & 1 deletion b/‎src/superannotate_databricks_connector/schemas/vector_schema.py‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎src/superannotate_databricks_connector/text.py‎
Lines changed: 3 additions & 2 deletions b/‎src/superannotate_databricks_connector/text.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/superannotate_databricks_connector/vector.py‎
Lines changed: 4 additions & 2 deletions b/‎src/superannotate_databricks_connector/vector.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎tests/test_data/text/expected_df.parquet/._SUCCESS.crc‎
-8 Bytes b/‎tests/test_data/text/expected_df.parquet/._SUCCESS.crc‎
-8 Bytes
diff --git a/‎tests/test_data/text/expected_df.parquet/.part-00000-7f6ed633-a832-4449-a980-3061e425daf2-c000.snappy.parquet.crc‎
-88 Bytes b/‎tests/test_data/text/expected_df.parquet/.part-00000-7f6ed633-a832-4449-a980-3061e425daf2-c000.snappy.parquet.crc‎
-88 Bytes
diff --git a/‎tests/test_data/text/expected_df.parquet/.part-00000-ab924e83-835d-4624-a491-49d66160c578-c000.snappy.parquet.crc‎
84 Bytes b/‎tests/test_data/text/expected_df.parquet/.part-00000-ab924e83-835d-4624-a491-49d66160c578-c000.snappy.parquet.crc‎
84 Bytes
@@ -0,0 +1,42 @@
+name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI
+
+on:
+  release:
+    types: [prereleased,released]
+
+jobs:
+  build-n-publish:
+    name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.8"
+    - name: Upgrade pip
+      run: >-
+        python -m
+        pip install
+        pip --upgrade
+        --user
+    - name: Install pypi/build
+      run: >-
+        python -m
+        pip install
+        build
+        --user
+    - name: Build a binary wheel and a source tarball
+      run: >-
+        python -m
+        build
+        --sdist
+        --wheel
+        --outdir dist/
+        .
+    - name: Publish distribution 📦 to PyPI
+      if: startsWith(github.ref, 'refs/tags')
+      uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        password: ${{ secrets.pypi_password }}
+        verbose: true
@@ -2,7 +2,7 @@
 
 name = "superannotate_databricks_connector"  # Required
 
-version = "0.0.1dev1"
+version = "0.0.2dev1"
 
 description = "Custom functions to work with SuperAnnotate in Databricks"
 
 
@@ -34,6 +34,14 @@ If you are running the tests for the first you first have to build the base dock
 docker build -f Dockerfile.spark -t spark_docker_base .
 ```
 
+### Build package
+
+In the main directory, run the following to generate a .whl file. 
+
+```bash
+python -m build
+```
+
 ### Usage
 First import the required function
 
 
@@ -42,7 +42,6 @@ def get_text_schema():
     schema = StructType([
         StructField("name", StringType(), True),
         StructField("url", StringType(), True),
-        StructField("contentLength", IntegerType(), True),
         StructField("projectId", IntegerType(), True),
         StructField("status", StringType(), True),
         StructField("annotatorEmail", StringType(), True),
 
@@ -59,6 +59,23 @@ def get_vector_instance_schema():
     return instance_schema
 
 
+def get_vector_tag_schema():
+    schema = StructType([
+        StructField("instance_type", StringType(), True),
+        StructField("classId", IntegerType(), True),
+        StructField("probability", IntegerType(), True),
+        StructField("attributes", ArrayType(MapType(StringType(),
+                                                    StringType())),
+                    True),
+        StructField("createdAt", StringType(), True),
+        StructField("createdBy", MapType(StringType(), StringType()), True),
+        StructField("creationType", StringType(), True),
+        StructField("updatedAt", StringType(), True),
+        StructField("updatedBy", MapType(StringType(), StringType()), True),
+        StructField("className", StringType(), True)])
+    return schema
+
+
 def get_vector_schema():
     schema = StructType([
         StructField("image_height", IntegerType(), True),
@@ -73,6 +90,7 @@ def get_vector_schema():
         StructField("instances", ArrayType(get_vector_instance_schema()),
                     True),
         StructField("bounding_boxes", ArrayType(IntegerType()), True),
-        StructField("comments", ArrayType(get_comment_schema()), True)
+        StructField("comments", ArrayType(get_comment_schema()), True),
+        StructField("tags", ArrayType(get_vector_tag_schema()), True)
     ])
     return schema
@@ -1,5 +1,7 @@
 from datetime import datetime
-from superannotate_databricks_connector.schemas.text_schema import get_text_schema
+from superannotate_databricks_connector.schemas.text_schema import (
+    get_text_schema
+)
 
 
 def convert_dates(instance):
@@ -40,7 +42,6 @@ def get_text_dataframe(annotations, spark):
         flattened_item = {
             "name": item["metadata"]["name"],
             "url": item["metadata"]["url"],
-            "contentLength": item["metadata"]["contentLength"],
             "projecId": item["metadata"]["projectId"],
             "status": item["metadata"]["status"],
             "annotatorEmail": item["metadata"]["annotatorEmail"],
 
@@ -1,4 +1,6 @@
-from superannotate_databricks_connector.schemas.vector_schema import get_vector_schema
+from superannotate_databricks_connector.schemas.vector_schema import (
+    get_vector_schema
+)
 
 
 def process_comment(comment):
@@ -140,7 +142,7 @@ def get_vector_dataframe(annotations, spark, custom_id_map=None):
             'qaEmail': item["metadata"]['qaEmail'],
             "instances": [process_vector_object(instance, custom_id_map)
                           for instance in item["instances"]
-                          if instance["type"] == "object"],
+                          if instance["type"] != "tag"],
             "bounding_boxes": get_boxes(item["instances"], custom_id_map),
             "tags": [process_vector_tag(instance, custom_id_map)
                      for instance in item["instances"]