Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## Version 3.0.0 - Python versions supported update - Breaking change - 2025-02
- ⚠️ Drop python3.6 support
- ⚠️ Base model weights needs redownloading to get weights in keras 3 format.
- Supports Python3.9, 3.10, 3.11, 3.12, 3.13

## Version 2.0.3 - Update regarding partial supersession - 2022-08
- ⚠️ The plugin is now partially superseded by native capabilities

Expand Down
3 changes: 2 additions & 1 deletion code-env/python/desc.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"acceptedPythonInterpreters": ["PYTHON36"],
"acceptedPythonInterpreters": ["PYTHON39","PYTHON310","PYTHON311","PYTHON312","PYTHON313"],
"forceConda": false,
"installCorePackages": true,
"corePackagesSet": "AUTO",
"installJupyterSupport": false
}
14 changes: 7 additions & 7 deletions code-env/python/spec/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
scikit-learn==0.23.1
tensorflow==2.2.2
h5py>=2.10.0
Pillow==8.1.1
tables==3.6.1
flask>=1.0,<1.1
GPUtil==1.4.0
scikit-learn<1.8
tensorflow<2.21
h5py<3.16
Pillow<12.2
tables<3.11
flask<3.2
GPUtil==1.4.0
4 changes: 2 additions & 2 deletions plugin.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"id": "deeplearning-image",
"version": "2.0.3",
"version": "3.0.0",
"meta": {
"label": "Deep learning on images",
"category": "Computer Vision",
"description": "Use a pre-trained model to score images or extract features. You can retrain generic models to specialize them on a particular set of images. \n⚠️ This plugin is now partially superseded by the native [image classification capabilities](https://doc.dataiku.com/dss/latest/machine-learning/computer-vision/index.html).",
"author": "Dataiku (Henri Chabert)",
"author": "Dataiku",
"icon": "icon-picture",
"licenseInfo": "Apache Software License",
"url": "https://www.dataiku.com/dss/plugins/info/deeplearning-image.html",
Expand Down
10 changes: 5 additions & 5 deletions python-lib/dku_deeplearning_image/api_designer/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
scikit-learn==0.23.1
tensorflow==2.2.2
h5py>=2.10.0
Pillow==8.1.1
tables==3.6.1
scikit-learn<1.8
tensorflow<2.21
h5py<3.16
Pillow<12.2
tables<3.11
GPUtil==1.4.0
22 changes: 15 additions & 7 deletions python-lib/dku_deeplearning_image/tensorboard_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from argparse import ArgumentParser
from tensorboard.plugins import base_plugin
from dataikuapi.utils import DataikuException
from tensorboard.backend.event_processing import data_ingester

from dku_deeplearning_image.dku_constants import TENSORBOARD_LOGS

Expand Down Expand Up @@ -63,10 +64,13 @@ def make_plugin_loader(plugin_spec):
Returns:
A TBLoader for the given plugin.
"""
if issubclass(plugin_spec, base_plugin.TBLoader):
return plugin_spec()
if issubclass(plugin_spec, base_plugin.TBPlugin):
return base_plugin.BasicLoader(plugin_spec)
if isinstance(plugin_spec, base_plugin.TBLoader):
return plugin_spec
if isinstance(plugin_spec, type):
if issubclass(plugin_spec, base_plugin.TBLoader):
return plugin_spec()
if issubclass(plugin_spec, base_plugin.TBPlugin):
return base_plugin.BasicLoader(plugin_spec)
raise TypeError(f"Not a TBLoader or TBPlugin subclass: {plugin_spec}")


Expand All @@ -84,10 +88,14 @@ def __get_tb_app(tensorboard_logs):
flags.purge_orphaned_data = True
flags.reload_interval = 5.0
flags.logdir = tensorboard_logs
return application.standard_tensorboard_wsgi(
plugin_loaders=plugins,
ingester = data_ingester.LocalDataIngester(flags)
ingester.start()
return application.TensorBoardWSGIApp(
flags=flags,
plugins=plugins,
data_provider=ingester.data_provider,
assets_zip_provider=__get_custom_assets_zip_provider(),
flags=flags
deprecated_multiplexer=ingester.deprecated_multiplexer
)


Expand Down
13 changes: 10 additions & 3 deletions python-lib/dku_deeplearning_image/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,10 @@ def set_gpu_options(should_use_gpu, gpu_list, gpu_memory_allocation_mode, memory


def get_tf_strategy():
return tf.distribute.MirroredStrategy()
gpus = tf.config.list_physical_devices('GPU')
if len(gpus) > 1:
return tf.distribute.MirroredStrategy()
return tf.distribute.get_strategy()


def calculate_gpu_memory_allocation(memory_limit_ratio, gpu_to_use):
Expand All @@ -116,7 +119,9 @@ def calculate_gpu_memory_allocation(memory_limit_ratio, gpu_to_use):


def get_weights_filename(with_top=False):
return '{}{}.h5'.format(constants.WEIGHT_FILENAME, '' if with_top else constants.NOTOP_SUFFIX)
suffix = '' if with_top else constants.NOTOP_SUFFIX
ext = '.weights.h5'
return f'{constants.WEIGHT_FILENAME}{suffix}{ext}'


def get_file_path(folder_path, file_name):
Expand Down Expand Up @@ -199,10 +204,12 @@ def format_predictions_output(predictions, errors, classify=False, labels_df=Non

def apply_preprocess_image(tfds, input_shape, preprocessing, is_b64=False):
def _apply_preprocess_image(image_path):
return tf.numpy_function(
result = tf.numpy_function(
func=lambda x: tf.cast(preprocess_img(x, input_shape, preprocessing, is_b64), tf.float32),
inp=[image_path],
Tout=tf.float32)
result.set_shape(input_shape)
return result

def _convert_errors(images):
return tf.numpy_function(
Expand Down
24 changes: 20 additions & 4 deletions python-runnables/create-api-service-v2/runnable.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
"name": "code_env_name",
"label": "Environment name",
"type": "CODE_ENV",
"description":"Must be a PYTHON3 env containing the required packages: scikit-learn==0.23.1, tensorflow==2.2.2, h5py>=2.10.0, Pillow==8.1.1, tables==3.6.1, GPUtil==1.4.0",
"description":"Must be a Python 3.9-3.13 env containing the required packages: scikit-learn, tensorflow, h5py, Pillow, tables, GPUtil",

"visibilityCondition" : "model.code_env_options == 'existing'",
"mandatory": true
Expand All @@ -87,15 +87,31 @@
"label": "Python interpreter",
"selectChoices": [
{
"value": "PYTHON36",
"label": "Python 3.6 (from PATH)"
"value": "PYTHON39",
"label": "Python 3.9 (from PATH)"
},
{
"value": "PYTHON310",
"label": "Python 3.10 (from PATH)"
},
{
"value": "PYTHON311",
"label": "Python 3.11 (from PATH)"
},
{
"value": "PYTHON312",
"label": "Python 3.12 (from PATH)"
},
{
"value": "PYTHON313",
"label": "Python 3.13 (from PATH)"
},
{
"value": "CUSTOM",
"label": "Custom (lookup in PATH)"
}
],
"defaultValue": "PYTHON36",
"defaultValue": "PYTHON39",
"visibilityCondition" : "model.code_env_options == 'new'",
"mandatory": true
},
Expand Down
77 changes: 33 additions & 44 deletions python-runnables/download-models-v2/runnable.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from dataiku.runnables import Runnable
import dataiku
import requests
import json
import os
import tempfile
import time
import requests
import pandas as pd
import dku_deeplearning_image.dku_constants as constants
import dku_deeplearning_image.utils as utils
from dku_deeplearning_image.misc_objects import DkuModel
from dku_deeplearning_image.misc_objects import DkuFileManager
import time

# We deactivate GPU for this script, because all the methods only need to
# fetch information about model and do not make computation
Expand Down Expand Up @@ -61,56 +64,42 @@ def run(self, progress_callback):

new_model.set_config(config)

# Downloading weights
url_to_weights = new_model.get_weights_url()

def update_percent(percent, last_update_time):
new_time = time.time()
if (new_time - last_update_time) > 3:
progress_callback(percent)
return new_time
else:
return last_update_time

def download_files_to_managed_folder(output_f, files_info, chunk_size=8192):
total_size = 0
bytes_so_far = 0
for file_info in files_info:
response = requests.get(file_info["url"], stream=True)
total_size += int(response.headers.get('content-length'))
file_info["response"] = response
update_time = time.time()
for file_info in files_info:
with output_f.get_writer(file_info["filename"]) as f:
for content in file_info["response"].iter_content(chunk_size=chunk_size):
bytes_so_far += len(content)
# Only scale to 80% because needs to compute model summary after download
percent = int(float(bytes_so_far) / total_size * 80)
update_time = update_percent(percent, update_time)
f.write(content)

class_mapping_url = constants.IMAGENET_URL if trained_on == constants.IMAGENET else ""

files_to_dl = [
{"url": url_to_weights["top"], "filename": new_model.get_weights_path(with_top=True)},
{"url": url_to_weights["no_top"], "filename": new_model.get_weights_path(with_top=False)}
]

if class_mapping_url:
files_to_dl.append({"url": class_mapping_url, "filename": constants.CLASSES_MAPPING_FILE})

output_folder_dss.put_file(constants.CONFIG_FILE, json.dumps(config))
download_files_to_managed_folder(output_folder, files_to_dl)

if class_mapping_url:
mapping_df = pd.read_json(output_folder.get_download_stream(constants.CLASSES_MAPPING_FILE), orient="index")
# Keras 3: Load with weights='imagenet' and save_weights() to convert to Keras 3 format.
# This handles the internal h5 structure differences between legacy Google files and Keras 3.
with tempfile.TemporaryDirectory() as tmpdir:
progress_callback(10)

model_top = new_model.application.model_func(weights='imagenet', include_top=True)
weights_top_path = os.path.join(tmpdir, utils.get_weights_filename(with_top=True))
model_top.save_weights(weights_top_path)

progress_callback(40)

model_notop = new_model.application.model_func(weights='imagenet', include_top=False)
weights_notop_path = os.path.join(tmpdir, utils.get_weights_filename(with_top=False))
model_notop.save_weights(weights_notop_path)

progress_callback(70)

with open(weights_top_path, 'rb') as f:
output_folder.upload_stream(utils.get_weights_filename(with_top=True), f)
with open(weights_notop_path, 'rb') as f:
output_folder.upload_stream(utils.get_weights_filename(with_top=False), f)


progress_callback(80)

if trained_on == constants.IMAGENET:
response = requests.get(constants.IMAGENET_URL)
mapping_df = pd.read_json(response.text, orient="index")
mapping_df = mapping_df.reset_index()
mapping_df = mapping_df.rename(columns={"index": "id", 1: "className"})[["id", "className"]]
DkuFileManager.write_to_folder(
folder=output_folder,
file_path=constants.MODEL_LABELS_FILE,
content=mapping_df.to_csv(index=False, sep=","))
output_folder_dss.delete_file(constants.CLASSES_MAPPING_FILE)

new_model.load_model({}, constants.GOAL.SCORE)
new_model.save_info(output_folder)
Expand Down