wip: pytorch tensorflow

PJEstrada · PJEstrada · commit 04edb30d0ffb · 2021-08-24T08:18:54.000-06:00
diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py
@@ -2,6 +2,7 @@
 from ..regular.regular import refresh_from_dict
 import logging
 from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
+from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset
 
 def get_directory_list(self):
 	"""
@@ -131,6 +132,15 @@ def to_pytorch(self, transform = None):
 		)
 		return pytorch_dataset
 
+	def to_tensorflow(self):
+		file_id_list = self.all_file_ids()
+		diffgram_tensorflow_dataset = DiffgramTensorflowDataset(
+			project = self.client,
+			diffgram_file_id_list = file_id_list
+		)
+		tf_dataset = diffgram_tensorflow_dataset.get_dataset_obj()
+		return tf_dataset
+
 	def new(self, name: str):
 		"""
 		Create a new directory and update directory list.
diff --git a/sdk/diffgram/core/sliced_directory.py b/sdk/diffgram/core/sliced_directory.py
@@ -1,5 +1,6 @@
 from diffgram.core.directory import Directory
 from diffgram.pytorch_diffgram.diffgram_pytorch_dataset import DiffgramPytorchDataset
+from diffgram.tensorflow_diffgram.diffgram_tensorflow_dataset import DiffgramTensorflowDataset
 
 
 class SlicedDirectory(Directory):
@@ -15,7 +16,6 @@ def all_file_ids(self):
         page_num = 1
         result = []
         while page_num is not None:
-            print('slcied query', self.query)
             diffgram_files = self.list_files(limit = 1000,
                                              page_num = page_num,
                                              file_view_mode = 'ids_only',
@@ -37,3 +37,12 @@ def to_pytorch(self, transform = None):
 
         )
         return pytorch_dataset
+
+    def to_tensorflow(self):
+        file_id_list = self.all_file_ids()
+        diffgram_tensorflow_dataset = DiffgramTensorflowDataset(
+            project = self.client,
+            diffgram_file_id_list = file_id_list
+        )
+        tf_dataset = diffgram_tensorflow_dataset.get_dataset_obj()
+        return tf_dataset
diff --git a/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py b/sdk/diffgram/tensorflow_diffgram/diffgram_tensorflow_dataset.py
@@ -1,24 +1,23 @@
 from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator
 import os
-
+try:
+    import tensorflow as tf  # type: ignore
+except ModuleNotFoundError:
+    raise ModuleNotFoundError(
+        "'tensorflow' module should be installed to convert the Dataset into tensorflow format"
+    )
 
 class DiffgramTensorflowDataset(DiffgramDatasetIterator):
 
-    def __init__(self, project, diffgram_file_id_list = None):
+    def __init__(self, project, diffgram_file_id_list):
         """
 
         :param project (sdk.core.core.Project): A Project object from the Diffgram SDK
         :param diffgram_file_list (list): An arbitrary number of file ID's from Diffgram.
         :param transform (callable, optional): Optional transforms to be applied on a sample
         """
-        super(DiffgramDatasetIterator, self).__init__(project, diffgram_file_id_list)
-        global tf
-        try:
-            import tensorflow as tf  # type: ignore
-        except ModuleNotFoundError:
-            raise ModuleNotFoundError(
-                "'tensorflow' module should be installed to convert the Dataset into tensorflow format"
-            )
+        super(DiffgramTensorflowDataset, self).__init__(project, diffgram_file_id_list)
+
         self.diffgram_file_id_list = diffgram_file_id_list
 
         self.project = project
@@ -52,29 +51,34 @@ def __iter__(self):
         self.current_file_index = 0
         return self
 
+    def get_next_elm(self):
+        yield self.__next__()
+
     def __next__(self):
         file_id = self.diffgram_file_id_list[self.current_file_index]
         diffgram_file = self.project.file.get_by_id(file_id, with_instances = True)
+        print('AAA', diffgram_file.id)
+        image = self.get_image_data(diffgram_file)
         instance_data = self.get_file_instances(diffgram_file)
-        filename, file_extension = os.path.splitext(instance_data['diffgram_file']['image']['original_filename'])
-        print('instance_data', instance_data)
+        filename, file_extension = os.path.splitext(instance_data['diffgram_file'].image['original_filename'])
+        label_names_bytes = [x.encode() for x in instance_data['label_name_list']]
         tf_example_dict = {
-            'image/height': self.int64_feature(instance_data['diffgram_file']['height']),
-            'image/width': self.int64_feature(instance_data['diffgram_file']['width']),
-            'image/filename': self.bytes_feature(filename),
-            'image/source_id': self.bytes_feature(filename),
-            'image/encoded': self.bytes_feature(instance_data['image']),
-            'image/format': self.bytes_feature(file_extension),
+            'image/height': self.int64_feature(instance_data['diffgram_file'].image['height']),
+            'image/width': self.int64_feature(instance_data['diffgram_file'].image['width']),
+            'image/filename': self.bytes_feature(filename.encode()),
+            'image/source_id': self.bytes_feature(filename.encode()),
+            'image/encoded': self.bytes_feature(image.tobytes()),
+            'image/format': self.bytes_feature(file_extension.encode()),
             'image/object/bbox/xmin': self.float_list_feature(instance_data['x_min_list']),
             'image/object/bbox/xmax': self.float_list_feature(instance_data['x_max_list']),
             'image/object/bbox/ymin': self.float_list_feature(instance_data['y_min_list']),
             'image/object/bbox/ymax': self.float_list_feature(instance_data['y_max_list']),
-            'image/object/class/text': self.bytes_list_feature(instance_data['label_name_list']),
+            'image/object/class/text': self.bytes_list_feature(label_names_bytes),
             'image/object/class/label': self.int64_list_feature(instance_data['label_id_list']),
         }
         tf_example = tf.train.Example(features = tf.train.Features(feature = tf_example_dict))
         self.current_file_index += 1
         return tf_example
 
     def get_dataset_obj(self):
-        return tf.data.Dataset.from_generator(self.__iter__)
+        return tf.data.Dataset.from_generator(self.get_next_elm, output_signature = tf.TensorSpec(shape=(1,)))
diff --git a/sdk/diffgram/tensorflow_diffgram/pytorch_test.py b/sdk/diffgram/tensorflow_diffgram/pytorch_test.py
@@ -38,6 +38,8 @@ def display_masks():
 dataset = project.directory.get('Default')
 
 pytorch_dataset = dataset.to_pytorch()
+tf_dataset = dataset.to_tensorflow()
+
 
 sliced_dataset = dataset.slice(query = 'labels.sheep  > 0 or labels.sofa > 0')