diff --git a/generate_thresholds.py b/generate_thresholds.py index 47ff87a..f69e9f5 100644 --- a/generate_thresholds.py +++ b/generate_thresholds.py @@ -13,7 +13,7 @@ from sklearn.metrics import precision_recall_curve import warnings from lib.model_taxonomy_dataframe import ModelTaxonomyDataframe -from lib.tf_gp_elev_model import TFGeoPriorModelElev +from lib.geo_inferrer_tf import TFGeoPriorModelElev def ignore_shapely_deprecation_warning(message, category, filename, lineno, file=None, line=None): diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/geo_inferrer.py b/lib/geo_inferrer.py new file mode 100644 index 0000000..4caca4c --- /dev/null +++ b/lib/geo_inferrer.py @@ -0,0 +1,48 @@ +from abc import ABC, abstractmethod +import math + +import numpy as np +import tensorflow as tf + + +class GeoInferrer(ABC): + @abstractmethod + def __init__(self, model_path: str): + """Subclasses must implement this constructor.""" + pass + + @abstractmethod + def predict( + self, latitude: float, longitude: float, elevation: float + ) -> np.ndarray: + """ + given a location, calculate geo results + + Subclasses must implement this method. + """ + pass + + @staticmethod + def encode_loc(latitude, longitude, elevation): + latitude = np.array(latitude) + longitude = np.array(longitude) + elevation = np.array(elevation) + elevation = elevation.astype("float32") + grid_lon = longitude.astype("float32") / 180.0 + grid_lat = latitude.astype("float32") / 90.0 + + elevation[elevation > 0] = elevation[elevation > 0] / 6574.0 + elevation[elevation < 0] = elevation[elevation < 0] / 32768.0 + norm_elev = elevation + + norm_loc = tf.stack([grid_lon, grid_lat], axis=1) + + encoded_loc = tf.concat( + [ + tf.sin(norm_loc * math.pi), + tf.cos(norm_loc * math.pi), + tf.expand_dims(norm_elev, axis=1), + ], + axis=1, + ) + return encoded_loc diff --git a/lib/geo_inferrer_coreml.py b/lib/geo_inferrer_coreml.py new file mode 100644 index 0000000..4b8dcd5 --- /dev/null +++ b/lib/geo_inferrer_coreml.py @@ -0,0 +1,21 @@ +import coremltools as ct +import numpy as np + +from lib.geo_inferrer import GeoInferrer + + +class CoremlGeoPriorModelElev(GeoInferrer): + + def __init__(self, model_path: str): + self.model_path = model_path + self.gpmodel = ct.models.MLModel(self.model_path) + + def predict( + self, latitude: float, longitude: float, elevation: float + ) -> np.ndarray: + encoded_loc = GeoInferrer.encode_loc( + [latitude], [longitude], [elevation] + ).numpy() + out_dict = self.gpmodel.predict({"input_1": encoded_loc}) + preds = out_dict["Identity"][0] + return preds diff --git a/lib/geo_inferrer_factory.py b/lib/geo_inferrer_factory.py new file mode 100644 index 0000000..052a6fd --- /dev/null +++ b/lib/geo_inferrer_factory.py @@ -0,0 +1,20 @@ +from sys import platform + +from lib.geo_inferrer import GeoInferrer +from lib.geo_inferrer_coreml import CoremlGeoPriorModelElev +from lib.geo_inferrer_tflite import TFLiteGeoPriorModelElev +from lib.geo_inferrer_tf import TFGeoPriorModelElev + + +class GeoInferrerFactory: + @staticmethod + def create(model_path: str) -> GeoInferrer: + if "mlmodel" in model_path: + assert platform == "darwin", "CoreML models can only be used on macOS" + return CoremlGeoPriorModelElev(model_path) + elif "tflite" in model_path: + return TFLiteGeoPriorModelElev(model_path) + elif "h5" in model_path: + return TFGeoPriorModelElev(model_path) + else: + raise ValueError(f"Unsupported model format in path: {model_path}") diff --git a/lib/tf_gp_elev_model.py b/lib/geo_inferrer_tf.py similarity index 62% rename from lib/tf_gp_elev_model.py rename to lib/geo_inferrer_tf.py index e086d59..79c5a1c 100644 --- a/lib/tf_gp_elev_model.py +++ b/lib/geo_inferrer_tf.py @@ -1,15 +1,17 @@ +import os + import tensorflow as tf import numpy as np -import math -import os + from lib.res_layer import ResLayer +from lib.geo_inferrer import GeoInferrer os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" -class TFGeoPriorModelElev: +class TFGeoPriorModelElev(GeoInferrer): - def __init__(self, model_path): + def __init__(self, model_path: str): # initialize the geo model for inference tf.config.set_visible_devices([], "GPU") visible_devices = tf.config.get_visible_devices() @@ -17,15 +19,16 @@ def __init__(self, model_path): assert device.device_type != "GPU" self.gpmodel = tf.keras.models.load_model( model_path, - custom_objects={"ResLayer": ResLayer}, + custom_objects={"ResLayer": ResLayer}, compile=False ) - def predict(self, latitude, longitude, elevation): - encoded_loc = TFGeoPriorModelElev.encode_loc([latitude], [longitude], [elevation]) - return self.gpmodel(tf.convert_to_tensor( + def predict(self, latitude: float, longitude: float, elevation: float) -> np.ndarray: + encoded_loc = GeoInferrer.encode_loc([latitude], [longitude], [elevation]) + output = self.gpmodel(tf.convert_to_tensor( tf.expand_dims(encoded_loc[0], axis=0) ), training=False)[0] + return output def features_for_one_class_elevation(self, latitude, longitude, elevation): """Evalutes the model for a single class and multiple locations @@ -60,26 +63,3 @@ def eval_one_class_elevation_from_features(self, features, class_of_interest): transpose_b=True ) ).numpy() - - @staticmethod - def encode_loc(latitude, longitude, elevation): - latitude = np.array(latitude) - longitude = np.array(longitude) - elevation = np.array(elevation) - elevation = elevation.astype("float32") - grid_lon = longitude.astype("float32") / 180.0 - grid_lat = latitude.astype("float32") / 90.0 - - elevation[elevation > 0] = elevation[elevation > 0] / 6574.0 - elevation[elevation < 0] = elevation[elevation < 0] / 32768.0 - norm_elev = elevation - - norm_loc = tf.stack([grid_lon, grid_lat], axis=1) - - encoded_loc = tf.concat([ - tf.sin(norm_loc * math.pi), - tf.cos(norm_loc * math.pi), - tf.expand_dims(norm_elev, axis=1), - - ], axis=1) - return encoded_loc diff --git a/lib/geo_inferrer_tflite.py b/lib/geo_inferrer_tflite.py new file mode 100644 index 0000000..842a995 --- /dev/null +++ b/lib/geo_inferrer_tflite.py @@ -0,0 +1,33 @@ +import numpy as np +import tensorflow as tf + +from lib.geo_inferrer import GeoInferrer + + +class TFLiteGeoPriorModelElev(GeoInferrer): + + def __init__(self, model_path: str): + self.model_path = model_path + self.interpreter = tf.lite.Interpreter(model_path=self.model_path) + self.interpreter.allocate_tensors() + + def predict( + self, latitude: float, longitude: float, elevation: float + ) -> np.ndarray: + encoded_loc = GeoInferrer.encode_loc( + [latitude], [longitude], [elevation] + ).numpy() + + input_details = self.interpreter.get_input_details() + output_details = self.interpreter.get_output_details() + + input_dtype = input_details[0]["dtype"] + encoded_loc = encoded_loc.astype(input_dtype) + + self.interpreter.set_tensor( + input_details[0]["index"], + encoded_loc, + ) + self.interpreter.invoke() + output_data = self.interpreter.get_tensor(output_details[0]["index"]) + return output_data[0] diff --git a/lib/inat_inferrer.py b/lib/inat_inferrer.py index 73107b8..4aa2039 100644 --- a/lib/inat_inferrer.py +++ b/lib/inat_inferrer.py @@ -16,8 +16,8 @@ import asyncio from PIL import Image -from lib.tf_gp_elev_model import TFGeoPriorModelElev -from lib.vision_inferrer import VisionInferrer +from lib.geo_inferrer_factory import GeoInferrerFactory +from lib.vision_inferrer_factory import VisionInferrerFactory from lib.model_taxonomy_dataframe import ModelTaxonomyDataframe pd.options.mode.copy_on_write = True @@ -140,7 +140,7 @@ def setup_synonym_taxonomy(self): self.taxonomy = synonym_taxonomy def setup_vision_model(self): - self.vision_inferrer = VisionInferrer( + self.vision_inferrer = VisionInferrerFactory.create( self.config["vision_model_path"] ) @@ -184,13 +184,18 @@ def setup_geo_model(self): if self.geo_elevation_cells is None: return - self.geo_elevation_model = TFGeoPriorModelElev(self.config["tf_geo_elevation_model_path"]) - self.geo_model_features = self.geo_elevation_model.features_for_one_class_elevation( - latitude=list(self.geo_elevation_cells.lat), - longitude=list(self.geo_elevation_cells.lng), - elevation=list(self.geo_elevation_cells.elevation) + self.geo_elevation_model = GeoInferrerFactory.create( + self.config["tf_geo_elevation_model_path"] ) + if hasattr(self.geo_elevation_model, "features_for_one_class_elevation"): + self.geo_model_features = self.geo_elevation_model.features_for_one_class_elevation( + latitude=list(self.geo_elevation_cells.lat), + longitude=list(self.geo_elevation_cells.lng), + elevation=list(self.geo_elevation_cells.elevation) + ) + + def vision_predict(self, image, debug=False): if debug: start_time = time.time() diff --git a/lib/vision_inferrer.py b/lib/vision_inferrer.py index 04aebcb..42f937b 100644 --- a/lib/vision_inferrer.py +++ b/lib/vision_inferrer.py @@ -1,35 +1,36 @@ +from abc import ABC, abstractmethod +from typing import Optional, TypedDict + +import numpy as np import tensorflow as tf -class VisionInferrer: - - def __init__(self, model_path): - self.model_path = model_path - self.prepare_tf_model() - - # initialize the TF model given the configured path - def prepare_tf_model(self): - # disable GPU processing - tf.config.set_visible_devices([], "GPU") - visible_devices = tf.config.get_visible_devices() - for device in visible_devices: - assert device.device_type != "GPU" - - full_model = tf.keras.models.load_model(self.model_path, compile=False) - self.layered_model = tf.keras.Model( - inputs=full_model.inputs, - outputs=[ - full_model.layers[4].output, - full_model.layers[2].output - ] - ) - self.layered_model.compile() - - # given an image object (usually coming from prepare_image_for_inference), - # calculate vision results for the image - def process_image(self, image): - layer_results = self.layered_model(tf.convert_to_tensor(image), training=False) - return { - "predictions": layer_results[0][0], - "features": layer_results[1][0], - } +class VisionResults(TypedDict): + predictions: np.ndarray + features: Optional[np.ndarray] + + +class VisionInferrer(ABC): + @abstractmethod + def __init__(self, model_path: str): + """Subclasses must implement this constructor.""" + pass + + @abstractmethod + def prepare_model(self): + """ + Initialize the model. + + Subclasses must implement this method. + """ + pass + + @abstractmethod + def process_image(self, image: tf.Tensor) -> VisionResults: + """ + given an image object (usually coming from prepare_image_for_inference), + calculate vision results for the image + + Subclasses must implement this method. + """ + pass diff --git a/lib/vision_inferrer_coreml.py b/lib/vision_inferrer_coreml.py new file mode 100644 index 0000000..2718eed --- /dev/null +++ b/lib/vision_inferrer_coreml.py @@ -0,0 +1,42 @@ +import coremltools as ct +from PIL import Image +import tensorflow as tf + +from lib.vision_inferrer import VisionInferrer, VisionResults + + +class VisionInferrerCoreML(VisionInferrer): + """Vision Inferrer for the CoreML variant of iNat vision models. + Our implementation expects a single PIL image in the range [0, 255). + """ + + def __init__(self, model_path: str): + self.model_path = model_path + self.prepare_model() + + def prepare_model(self): + """initialize the CoreML model given the configured path""" + self.model = ct.models.MLModel(self.model_path) + spec = self.model.get_spec() + self.input_name = spec.description.input[0].name + + def process_image(self, image_tensor: tf.Tensor) -> VisionResults: + """given an image object (coming from prepare_image_for_inference), + calculate & return vision results for the image.""" + # coreml expects a PIL image so we have to convert from tf + # first we convert from floats [0, 1) to ints [0, 255) + image = tf.image.convert_image_dtype(image_tensor, dtype=tf.uint8) + + # Remove batch dimension if present and convert to NumPy array + image_numpy = image.numpy() + if image_numpy.ndim == 4: + image_numpy = image_numpy[0] + + # Create PIL Image from NumPy array + image_pil = Image.fromarray(image_numpy) + + out_dict = self.model.predict({self.input_name: image_pil}) + preds = out_dict["Identity"][0] + + # don't return features, not relevant for coreml at this point + return {"predictions": preds, "features": None} diff --git a/lib/vision_inferrer_factory.py b/lib/vision_inferrer_factory.py new file mode 100644 index 0000000..ffccc58 --- /dev/null +++ b/lib/vision_inferrer_factory.py @@ -0,0 +1,20 @@ +from sys import platform + +from lib.vision_inferrer import VisionInferrer +from lib.vision_inferrer_coreml import VisionInferrerCoreML +from lib.vision_inferrer_tflite import VisionInferrerTFLite +from lib.vision_inferrer_tf import VisionInferrerTF + + +class VisionInferrerFactory: + @staticmethod + def create(model_path: str) -> VisionInferrer: + if "mlmodel" in model_path: + assert platform == "darwin", "CoreML models can only be used on macOS" + return VisionInferrerCoreML(model_path) + elif "tflite" in model_path: + return VisionInferrerTFLite(model_path) + elif "h5" in model_path: + return VisionInferrerTF(model_path) + else: + raise ValueError(f"Unsupported model format in path: {model_path}") diff --git a/lib/vision_inferrer_tf.py b/lib/vision_inferrer_tf.py new file mode 100644 index 0000000..63ef88b --- /dev/null +++ b/lib/vision_inferrer_tf.py @@ -0,0 +1,37 @@ +import tensorflow as tf + +from lib.vision_inferrer import VisionInferrer, VisionResults + + +class VisionInferrerTF(VisionInferrer): + """Vision Inferrer for the TF variant of iNat vision models. + Our implementation expects inputs in the range [0, 1). + """ + + def __init__(self, model_path: str): + self.model_path = model_path + self.prepare_model() + + def prepare_model(self): + """initialize the TF model given the configured path""" + # disable GPU processing + tf.config.set_visible_devices([], "GPU") + visible_devices = tf.config.get_visible_devices() + for device in visible_devices: + assert device.device_type != "GPU" + + full_model = tf.keras.models.load_model(self.model_path, compile=False) + self.layered_model = tf.keras.Model( + inputs=full_model.inputs, + outputs=[full_model.layers[-1].output, full_model.layers[2].output], + ) + self.layered_model.compile() + + def process_image(self, image: tf.Tensor) -> VisionResults: + """given an image object (coming from prepare_image_for_inference), + calculate & return vision results for the image.""" + layer_results = self.layered_model(tf.convert_to_tensor(image), training=False) + return { + "predictions": layer_results[0][0], + "features": layer_results[1][0], + } diff --git a/lib/vision_inferrer_tflite.py b/lib/vision_inferrer_tflite.py new file mode 100644 index 0000000..a22ce34 --- /dev/null +++ b/lib/vision_inferrer_tflite.py @@ -0,0 +1,41 @@ +import tensorflow as tf + +from lib.vision_inferrer import VisionInferrer, VisionResults + + +class VisionInferrerTFLite(VisionInferrer): + """Vision Inferrer for the tflite variant of iNat vision models. + Our implementation expects inputs in the range [0, 255). + """ + + def __init__(self, model_path: str): + self.model_path = model_path + self.prepare_model() + + def prepare_model(self): + """initialize the tflite model given the configured path""" + self.interpreter = tf.lite.Interpreter(model_path=self.model_path) + self.interpreter.allocate_tensors() + + self.input_details = self.interpreter.get_input_details() + self.output_details = self.interpreter.get_output_details() + + def process_image(self, image_tensor: tf.Tensor) -> VisionResults: + """given an image object (coming from prepare_image_for_inference), + calculate & return vision results for the image.""" + # tflite expects an image in range [0, 255] not [0, 1] + image_tensor = image_tensor * 255 + + # set the input to tflite model + input_dtype = self.input_details[0]["dtype"] + input_data = image_tensor.numpy().astype(input_dtype) + self.interpreter.set_tensor(self.input_details[0]["index"], input_data) + + # execute the tflite model + self.interpreter.invoke() + + # extract the output + output_data = self.interpreter.get_tensor(self.output_details[0]["index"]) + + # don't return features, not relevant for tflite at this point + return {"predictions": output_data[0], "features": None} diff --git a/requirements.txt b/requirements.txt index a66056b..f00f48e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ aiofiles==24.1.0 aiohttp==3.11.2;python_version>="3.11" aiohttp==3.10.11;python_version=="3.8" +coremltools==8.2 flake8==7.0.0 flake8-quotes==3.4.0 Flask[async]==3.0.2 diff --git a/taxon_range_evaluation.py b/taxon_range_evaluation.py index 1d00336..da9ddef 100644 --- a/taxon_range_evaluation.py +++ b/taxon_range_evaluation.py @@ -18,7 +18,7 @@ from sklearn.metrics import auc from sklearn.metrics import precision_recall_curve from lib.model_taxonomy_dataframe import ModelTaxonomyDataframe -from lib.tf_gp_elev_model import TFGeoPriorModelElev +from lib.geo_inferrer_tf import TFGeoPriorModelElev def evaluate_p_r(thres, gdfb, tr_h3, world, plot): diff --git a/tests/conftest.py b/tests/conftest.py index f58935f..4a58158 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,8 +22,8 @@ def taxon(request, taxonomy): @pytest.fixture() def inatInferrer(request, mocker): config = { - "vision_model_path": "vision_model_path", - "tf_geo_elevation_model_path": "tf_geo_elevation_model_path", + "vision_model_path": "vision_model_path.h5", + "tf_geo_elevation_model_path": "tf_geo_elevation_model_path.h5", "taxonomy_path": os.path.realpath(os.path.dirname(__file__) + "/fixtures/taxonomy.csv"), "elevation_h3_r4": diff --git a/tests/test_tf_gp_elev_model.py b/tests/test_tf_gp_elev_model.py index fb56d3b..0821a6b 100644 --- a/tests/test_tf_gp_elev_model.py +++ b/tests/test_tf_gp_elev_model.py @@ -1,7 +1,7 @@ import pytest import tensorflow as tf from lib.res_layer import ResLayer -from lib.tf_gp_elev_model import TFGeoPriorModelElev +from lib.geo_inferrer_tf import TFGeoPriorModelElev from unittest.mock import MagicMock import unittest.mock as mock diff --git a/tests/test_vision_inferrer.py b/tests/test_vision_inferrer.py index 5327b33..7693870 100644 --- a/tests/test_vision_inferrer.py +++ b/tests/test_vision_inferrer.py @@ -1,6 +1,6 @@ import tensorflow as tf from unittest.mock import MagicMock -from lib.vision_inferrer import VisionInferrer +from lib.vision_inferrer_tf import VisionInferrerTF class TestVisionInferrer: @@ -8,7 +8,7 @@ def test_initialization(self, mocker): mocker.patch("tensorflow.keras.models.load_model", return_value=MagicMock()) mocker.patch("tensorflow.keras.Model", return_value=MagicMock()) model_path = "model_path" - inferrer = VisionInferrer(model_path) + inferrer = VisionInferrerTF(model_path) assert inferrer.model_path == model_path tf.keras.models.load_model.assert_called_once_with( model_path, @@ -19,7 +19,7 @@ def test_process_image(self, mocker): mocker.patch("tensorflow.keras.models.load_model", return_value=MagicMock()) mocker.patch("tensorflow.keras.Model", return_value=MagicMock()) model_path = "model_path" - inferrer = VisionInferrer(model_path) + inferrer = VisionInferrerTF(model_path) theimage = "theimage" inferrer.process_image(theimage) inferrer.layered_model.assert_called_once_with(