diff --git a/.github/workflows/ctest.yml b/.github/workflows/ctest.yml index 63c33f0..287e182 100644 --- a/.github/workflows/ctest.yml +++ b/.github/workflows/ctest.yml @@ -31,6 +31,7 @@ jobs: uses: threeal/cmake-action@v1.3.0 with: run-build: true + options: DALOTIA_WITH_TENSORFLOW=ON build-args: --config ${{ matrix.build_type }} # cxx-compiler: ${{ matrix.compiler }} build-dir: build-${{ matrix.build_type }} @@ -40,3 +41,5 @@ jobs: with: test-dir: build-${{ matrix.build_type }} args: --output-on-failure -T memcheck + - name: Output Memcheck Log + run: for i in /home/runner/work/dalotia/dalotia/build-*/Testing/Temporary/MemoryChecker.*.log ; do echo $i ; cat $i ; done diff --git a/.github/workflows/spack.yml b/.github/workflows/spack.yml index 971350a..0bbf722 100644 --- a/.github/workflows/spack.yml +++ b/.github/workflows/spack.yml @@ -26,5 +26,12 @@ jobs: run: | spack spec dalotia spack info dalotia - spack dev-build dalotia@main - \ No newline at end of file + spack dev-build --test=root dalotia@main + spack load --sh dalotia@main + - name: info and install dalotia with tensorflow + shell: spack-bash {0} + run: | + spack spec dalotia~safetensorscpp+tensorflow + spack info dalotia~safetensorscpp+tensorflow + spack dev-build --test=root dalotia@main ~safetensorscpp+tensorflow + spack load --sh dalotia@main+tensorflow diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d1c016..2801691 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,12 +1,13 @@ cmake_minimum_required(VERSION 3.24) -project(DALOTIA CXX C) +project(dalotia CXX C) option(DALOTIA_CPP_BUILD_EXAMPLES "Build examples" ON) option(DALOTIA_BUILD_TESTS "Build tests" ON) option(DALOTIA_WITH_CPP_PMR "use polymorphic memory resources (pmr) C++17 feature for dalotia" ON) option(DALOTIA_WITH_OPENMP "Build with OpenMP support" OFF) option(DALOTIA_WITH_SAFETENSORS_CPP "use safetensors-cpp for tensor I/O" ON) +option(DALOTIA_WITH_TENSORFLOW "use the Tensorflow C backend for tensor I/O" OFF) option(DALOTIA_WITH_FORTRAN "Build Fortran interface" ON) if (DALOTIA_WITH_FORTRAN) enable_language(Fortran) @@ -19,6 +20,10 @@ if (DALOTIA_WITH_FORTRAN) endif (DALOTIA_WITH_FORTRAN) set(CMAKE_CXX_STANDARD 17) +set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_SOURCE_DIR}/cmake/modules") + +include(GNUInstallDirs) +set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}) # if this is empty, will be set at config time # (build with --config Release or --config Debug etc.) @@ -28,14 +33,14 @@ message(STATUS "CMAKE BUILD TYPE: ${CMAKE_BUILD_TYPE}") file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/ DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/) # safetensors-cpp +set(DALOTIA_EXTERNAL_SAFETENSORS_CPP OFF) if (DALOTIA_WITH_SAFETENSORS_CPP) - if (NOT DEFINED safetensors-cpp_DIR OR safetensors-cpp_DIR MATCHES "fetch") + if (NOT DEFINED safetensors-cpp_DIR OR safetensors-cpp_DIR MATCHES "fetch" AND (NOT DEFINED safetensors-cpp_FOUND OR NOT safetensors-cpp_FOUND)) include(FetchContent) FetchContent_Declare( safetensors-cpp GIT_REPOSITORY https://github.com/syoyo/safetensors-cpp.git GIT_TAG a88953c981c6773760540592fa97f04619a8f825 - OVERRIDE_FIND_PACKAGE ) set(SAFETENSORS_CPP_CXX_EXCEPTIONS true) #TODO maybe depend on language? set(SAFETENSORS_CPP_BUILD_EXAMPLES false) @@ -47,20 +52,56 @@ if (DALOTIA_WITH_SAFETENSORS_CPP) target_include_directories(safetensors_cpp PUBLIC $ ) - # TODO this is not good, every package should install its own targets - # cf. https://discourse.cmake.org/t/propagation-of-fetchcontent-targets-when-installing/2559/2 - install(TARGETS safetensors_cpp EXPORT dalotia - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib - RUNTIME DESTINATION bin - INCLUDES DESTINATION include - ) else() # to pass safetensors-cpp_DIR on the command line: find_package(safetensors-cpp REQUIRED CMAKE_FIND_ROOT_PATH_BOTH) + set(DALOTIA_EXTERNAL_SAFETENSORS_CPP ON) endif() endif (DALOTIA_WITH_SAFETENSORS_CPP) +# tensorflow +if (DALOTIA_WITH_TENSORFLOW) + if (NOT DEFINED tensorflow_DIR OR tensorflow_DIR MATCHES "fetch") + include(FetchContent) + FetchContent_Declare( + tensorflow + URL https://storage.googleapis.com/tensorflow/versions/2.18.0/libtensorflow-gpu-linux-x86_64.tar.gz + ) + FetchContent_MakeAvailable(tensorflow) + + FetchContent_GetProperties(tensorflow SOURCE_DIR tensorflow_SRC_DIR) + set(tensorflow_INCLUDE_DIRS ${tensorflow_SRC_DIR}/include) + find_library(tensorflow_LIBRARIES + NAMES tensorflow + PATHS ${tensorflow_SRC_DIR}/lib + NO_DEFAULT_PATH + ) + message(STATUS "tensorflow_LIBRARIES: ${tensorflow_LIBRARIES}") + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(tensorflow DEFAULT_MSG tensorflow_INCLUDE_DIRS tensorflow_LIBRARIES) + else() + find_package(tensorflow REQUIRED CMAKE_FIND_ROOT_PATH_BOTH) + endif() + if(NOT TARGET tensorflow::tensorflow) + add_library(tensorflow::tensorflow SHARED IMPORTED) + set_target_properties( + tensorflow::tensorflow + PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${tensorflow_LIBRARIES}" + INTERFACE_INCLUDE_DIRECTORIES "${tensorflow_INCLUDE_DIRS}" + )# $ $ + # cf. https://stackoverflow.com/a/41179630/7272382 + install(FILES ${tensorflow_INCLUDE_DIRS}/tensorflow/c/c_api.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensorflow/c + ) + install(FILES ${tensorflow_LIBRARIES} + DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + endif() +endif (DALOTIA_WITH_TENSORFLOW) + add_subdirectory(src) # target dalotia_cpp is generated here if (DALOTIA_WITH_OPENMP) find_package(OpenMP) @@ -80,25 +121,38 @@ if (DALOTIA_CPP_BUILD_EXAMPLES) endif (DALOTIA_CPP_BUILD_EXAMPLES) if (DALOTIA_BUILD_TESTS) - list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") enable_testing() add_subdirectory(test) endif (DALOTIA_BUILD_TESTS) -# install version info etc. + +# install dependencies, version info, etc. include(CMakePackageConfigHelpers) +configure_package_config_file( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${PROJECT_NAME}-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake + INSTALL_DESTINATION ${INSTALL_CONFIGDIR} + PATH_VARS DALOTIA_EXTERNAL_SAFETENSORS_CPP +) +# # Install find modules +# install(DIRECTORY cmake/modules/ DESTINATION ${INSTALL_CONFIGDIR}/modules) + +install(FILES + ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake + DESTINATION ${INSTALL_CONFIGDIR} +) write_basic_package_version_file( "dalotia-config-version.cmake" VERSION 0.0.1 COMPATIBILITY SameMajorVersion ) install(FILES $/dalotia-config-version.cmake - DESTINATION lib/cmake/dalotia + DESTINATION ${INSTALL_CONFIGDIR} ) # export full library interface -install(EXPORT dalotia - FILE dalotia-config.cmake +install(EXPORT dalotia_export_set + FILE ${PROJECT_NAME}-targets.cmake NAMESPACE dalotia:: - DESTINATION lib/cmake/dalotia + DESTINATION ${INSTALL_CONFIGDIR} ) diff --git a/cmake/dalotia-config.cmake.in b/cmake/dalotia-config.cmake.in new file mode 100644 index 0000000..beef9d7 --- /dev/null +++ b/cmake/dalotia-config.cmake.in @@ -0,0 +1,12 @@ +get_filename_component(dalotia_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) + +if(NOT TARGET dalotia::dalotia) + if(@DALOTIA_EXTERNAL_SAFETENSORS_CPP@) + # if we wanted to provide a FindSafetensors-cpp.cmake file: + set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${dalotia_CMAKE_DIR}/modules") + find_package(safetensors-cpp REQUIRED) + else() + include("${dalotia_CMAKE_DIR}/../safetensors-cpp-config.cmake") + endif() + include("${dalotia_CMAKE_DIR}/dalotia-targets.cmake") +endif() diff --git a/cmake/modules/Findtensorflow.cmake b/cmake/modules/Findtensorflow.cmake new file mode 100644 index 0000000..4aa23e9 --- /dev/null +++ b/cmake/modules/Findtensorflow.cmake @@ -0,0 +1,48 @@ +# file shamelessly taken from https://github.com/serizba/cppflow/blob/master/cmake/modules/Findtensorflow.cmake +# so here goes their license: + +# MIT License +# +# Copyright (c) 2019 Sergio Izquierdo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# installation: https://izquierdo.dev/cppflow/installation.html + +find_path(tensorflow_INCLUDE_DIRS + NAMES tensorflow/c/c_api.h +) +mark_as_advanced(tensorflow_INCLUDE_DIRS) + +find_library(tensorflow_LIBRARIES + NAMES tensorflow +) +mark_as_advanced(tensorflow_LIBRARIES) + + +if(NOT tensorflow_INCLUDE_DIRS) + message(STATUS "Could NOT find tensorflow/c/c_api.h") +endif() +if(NOT tensorflow_LIBRARIES) + message(STATUS "Could NOT find tensorflow library") +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(tensorflow DEFAULT_MSG tensorflow_INCLUDE_DIRS tensorflow_LIBRARIES) + diff --git a/data/generate_tf.py b/data/generate_tf.py new file mode 100755 index 0000000..fa8819f --- /dev/null +++ b/data/generate_tf.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +import tf_keras as keras # use keras version 2 by installing tf-keras +from tf_keras import layers, models, initializers + + +def residual_block(inputs, filters, kernel_size=3, stride=1): + x = layers.Conv2D( + filters, + kernel_size, + strides=stride, + padding="same", + kernel_initializer=initializers.Constant(0.1), + )(inputs) + x = layers.BatchNormalization()(x) + x = layers.ReLU()(x) + + x = layers.Conv2D( + filters, + kernel_size, + strides=1, + padding="same", + kernel_initializer=initializers.Constant(0.2), + )(x) + x = layers.BatchNormalization()(x) + + # Shortcut connection + if stride != 1 or inputs.shape[-1] != filters: + shortcut = layers.Conv2D( + filters, + 1, + strides=stride, + padding="same", + kernel_initializer=initializers.Constant(0.3), + )(inputs) + shortcut = layers.BatchNormalization()(shortcut) + else: + shortcut = inputs + + x = layers.Add()([x, shortcut]) + return layers.ReLU()(x) + + +def build_resnet(input_shape=(16, 16, 3), num_classes=10): + inputs = keras.Input(shape=input_shape) + x = layers.Conv2D( + 16, 3, padding="same", kernel_initializer=initializers.Constant(0.4) + )(inputs) + x = layers.BatchNormalization()(x) + x = layers.ReLU()(x) + + x = residual_block(x, 16) + + x = layers.GlobalAveragePooling2D()(x) + outputs = layers.Dense( + num_classes, activation="softmax", kernel_initializer=initializers.GlorotNormal(seed=133) + )(x) + + return models.Model(inputs, outputs) + + +if __name__ == "__main__": + model = build_resnet() + model.save("tensorflow_model", save_format="tf") + + for layer in model.layers: + print(f"{layer.name}:") + for weight in layer.weights: + print(f" {weight.name} = {weight.numpy()}") \ No newline at end of file diff --git a/data/tensorflow_model/keras_metadata.pb b/data/tensorflow_model/keras_metadata.pb new file mode 100644 index 0000000..ef2f891 --- /dev/null +++ b/data/tensorflow_model/keras_metadata.pb @@ -0,0 +1,19 @@ + +nroot"_tf_keras_network*m{"name": "model", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": false, "class_name": "Functional", "config": {"name": "model", "trainable": true, "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 16, 16, 3]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": []}, {"class_name": "Conv2D", "config": {"name": "conv2d", "trainable": true, "dtype": "float32", "filters": 16, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "Constant", "config": {"value": 0.4}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv2d", "inbound_nodes": [[["input_1", 0, 0, {}]]]}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization", "trainable": true, "dtype": "float32", "axis": [3], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization", "inbound_nodes": [[["conv2d", 0, 0, {}]]]}, {"class_name": "ReLU", "config": {"name": "re_lu", "trainable": true, "dtype": "float32", "max_value": null, "negative_slope": 0.0, "threshold": 0.0}, "name": "re_lu", "inbound_nodes": [[["batch_normalization", 0, 0, {}]]]}, {"class_name": "Conv2D", "config": {"name": "conv2d_1", "trainable": true, "dtype": "float32", "filters": 16, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "Constant", "config": {"value": 0.1}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv2d_1", "inbound_nodes": [[["re_lu", 0, 0, {}]]]}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization_1", "trainable": true, "dtype": "float32", "axis": [3], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization_1", "inbound_nodes": [[["conv2d_1", 0, 0, {}]]]}, {"class_name": "ReLU", "config": {"name": "re_lu_1", "trainable": true, "dtype": "float32", "max_value": null, "negative_slope": 0.0, "threshold": 0.0}, "name": "re_lu_1", "inbound_nodes": [[["batch_normalization_1", 0, 0, {}]]]}, {"class_name": "Conv2D", "config": {"name": "conv2d_2", "trainable": true, "dtype": "float32", "filters": 16, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "Constant", "config": {"value": 0.2}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv2d_2", "inbound_nodes": [[["re_lu_1", 0, 0, {}]]]}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization_2", "trainable": true, "dtype": "float32", "axis": [3], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}}, "gamma_initializer": {"class_name": "Ones", "config": {}}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}}, "moving_variance_initializer": {"class_name": "Ones", "config": {}}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization_2", "inbound_nodes": [[["conv2d_2", 0, 0, {}]]]}, {"class_name": "Add", "config": {"name": "add", "trainable": true, "dtype": "float32"}, "name": "add", "inbound_nodes": [[["batch_normalization_2", 0, 0, {}], ["re_lu", 0, 0, {}]]]}, {"class_name": "ReLU", "config": {"name": "re_lu_2", "trainable": true, "dtype": "float32", "max_value": null, "negative_slope": 0.0, "threshold": 0.0}, "name": "re_lu_2", "inbound_nodes": [[["add", 0, 0, {}]]]}, {"class_name": "GlobalAveragePooling2D", "config": {"name": "global_average_pooling2d", "trainable": true, "dtype": "float32", "data_format": "channels_last", "keepdims": false}, "name": "global_average_pooling2d", "inbound_nodes": [[["re_lu_2", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 10, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "GlorotNormal", "config": {"seed": 133}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["global_average_pooling2d", 0, 0, {}]]]}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense", 0, 0]]}, "shared_object_id": 33, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 16, 16, 3]}, "ndim": 4, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 16, 16, 3]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": {"class_name": "__tuple__", "items": [{"class_name": "TensorShape", "items": [null, 16, 16, 3]}, "float32", "input_1"]}}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": {"class_name": "__tuple__", "items": [{"class_name": "TensorShape", "items": [null, 16, 16, 3]}, "float32", "input_1"]}}, "keras_version": "2.19.0", "backend": "tensorflow", "model_config": {"class_name": "Functional", "config": {"name": "model", "trainable": true, "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 16, 16, 3]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": [], "shared_object_id": 0}, {"class_name": "Conv2D", "config": {"name": "conv2d", "trainable": true, "dtype": "float32", "filters": 16, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "Constant", "config": {"value": 0.4}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv2d", "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 3}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization", "trainable": true, "dtype": "float32", "axis": [3], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 4}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 5}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 6}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 7}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization", "inbound_nodes": [[["conv2d", 0, 0, {}]]], "shared_object_id": 8}, {"class_name": "ReLU", "config": {"name": "re_lu", "trainable": true, "dtype": "float32", "max_value": null, "negative_slope": 0.0, "threshold": 0.0}, "name": "re_lu", "inbound_nodes": [[["batch_normalization", 0, 0, {}]]], "shared_object_id": 9}, {"class_name": "Conv2D", "config": {"name": "conv2d_1", "trainable": true, "dtype": "float32", "filters": 16, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "Constant", "config": {"value": 0.1}, "shared_object_id": 10}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 11}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv2d_1", "inbound_nodes": [[["re_lu", 0, 0, {}]]], "shared_object_id": 12}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization_1", "trainable": true, "dtype": "float32", "axis": [3], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 13}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 14}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 15}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 16}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization_1", "inbound_nodes": [[["conv2d_1", 0, 0, {}]]], "shared_object_id": 17}, {"class_name": "ReLU", "config": {"name": "re_lu_1", "trainable": true, "dtype": "float32", "max_value": null, "negative_slope": 0.0, "threshold": 0.0}, "name": "re_lu_1", "inbound_nodes": [[["batch_normalization_1", 0, 0, {}]]], "shared_object_id": 18}, {"class_name": "Conv2D", "config": {"name": "conv2d_2", "trainable": true, "dtype": "float32", "filters": 16, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "Constant", "config": {"value": 0.2}, "shared_object_id": 19}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 20}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "conv2d_2", "inbound_nodes": [[["re_lu_1", 0, 0, {}]]], "shared_object_id": 21}, {"class_name": "BatchNormalization", "config": {"name": "batch_normalization_2", "trainable": true, "dtype": "float32", "axis": [3], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 22}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 23}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 24}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 25}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "name": "batch_normalization_2", "inbound_nodes": [[["conv2d_2", 0, 0, {}]]], "shared_object_id": 26}, {"class_name": "Add", "config": {"name": "add", "trainable": true, "dtype": "float32"}, "name": "add", "inbound_nodes": [[["batch_normalization_2", 0, 0, {}], ["re_lu", 0, 0, {}]]], "shared_object_id": 27}, {"class_name": "ReLU", "config": {"name": "re_lu_2", "trainable": true, "dtype": "float32", "max_value": null, "negative_slope": 0.0, "threshold": 0.0}, "name": "re_lu_2", "inbound_nodes": [[["add", 0, 0, {}]]], "shared_object_id": 28}, {"class_name": "GlobalAveragePooling2D", "config": {"name": "global_average_pooling2d", "trainable": true, "dtype": "float32", "data_format": "channels_last", "keepdims": false}, "name": "global_average_pooling2d", "inbound_nodes": [[["re_lu_2", 0, 0, {}]]], "shared_object_id": 29}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 10, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "GlorotNormal", "config": {"seed": 133}, "shared_object_id": 30}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 31}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["global_average_pooling2d", 0, 0, {}]]], "shared_object_id": 32}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense", 0, 0]]}}}2 + root.layer-0"_tf_keras_input_layer*{"class_name": "InputLayer", "name": "input_1", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 16, 16, 3]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 16, 16, 3]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}}2 + +root.layer_with_weights-0"_tf_keras_layer* {"name": "conv2d", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Conv2D", "config": {"name": "conv2d", "trainable": true, "dtype": "float32", "filters": 16, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "Constant", "config": {"value": 0.4}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 3, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 4, "axes": {"-1": 3}}, "shared_object_id": 35}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16, 16, 3]}}2 + root.layer_with_weights-1"_tf_keras_layer* {"name": "batch_normalization", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "BatchNormalization", "config": {"name": "batch_normalization", "trainable": true, "dtype": "float32", "axis": [3], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 4}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 5}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 6}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 7}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["conv2d", 0, 0, {}]]], "shared_object_id": 8, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 4, "max_ndim": null, "min_ndim": null, "axes": {"3": 16}}, "shared_object_id": 36}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16, 16, 16]}}2 + root.layer-3"_tf_keras_layer*{"name": "re_lu", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "ReLU", "config": {"name": "re_lu", "trainable": true, "dtype": "float32", "max_value": null, "negative_slope": 0.0, "threshold": 0.0}, "inbound_nodes": [[["batch_normalization", 0, 0, {}]]], "shared_object_id": 9, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16, 16, 16]}}2 + +root.layer_with_weights-2"_tf_keras_layer* {"name": "conv2d_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Conv2D", "config": {"name": "conv2d_1", "trainable": true, "dtype": "float32", "filters": 16, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "Constant", "config": {"value": 0.1}, "shared_object_id": 10}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 11}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["re_lu", 0, 0, {}]]], "shared_object_id": 12, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 4, "axes": {"-1": 16}}, "shared_object_id": 37}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16, 16, 16]}}2 + root.layer_with_weights-3"_tf_keras_layer* {"name": "batch_normalization_1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "BatchNormalization", "config": {"name": "batch_normalization_1", "trainable": true, "dtype": "float32", "axis": [3], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 13}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 14}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 15}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 16}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["conv2d_1", 0, 0, {}]]], "shared_object_id": 17, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 4, "max_ndim": null, "min_ndim": null, "axes": {"3": 16}}, "shared_object_id": 38}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16, 16, 16]}}2 + root.layer-6"_tf_keras_layer*{"name": "re_lu_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "ReLU", "config": {"name": "re_lu_1", "trainable": true, "dtype": "float32", "max_value": null, "negative_slope": 0.0, "threshold": 0.0}, "inbound_nodes": [[["batch_normalization_1", 0, 0, {}]]], "shared_object_id": 18, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16, 16, 16]}}2 + +root.layer_with_weights-4"_tf_keras_layer* {"name": "conv2d_2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Conv2D", "config": {"name": "conv2d_2", "trainable": true, "dtype": "float32", "filters": 16, "kernel_size": {"class_name": "__tuple__", "items": [3, 3]}, "strides": {"class_name": "__tuple__", "items": [1, 1]}, "padding": "same", "data_format": "channels_last", "dilation_rate": {"class_name": "__tuple__", "items": [1, 1]}, "groups": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "Constant", "config": {"value": 0.2}, "shared_object_id": 19}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 20}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["re_lu_1", 0, 0, {}]]], "shared_object_id": 21, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 4, "axes": {"-1": 16}}, "shared_object_id": 39}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16, 16, 16]}}2 +  root.layer_with_weights-5"_tf_keras_layer* {"name": "batch_normalization_2", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "BatchNormalization", "config": {"name": "batch_normalization_2", "trainable": true, "dtype": "float32", "axis": [3], "momentum": 0.99, "epsilon": 0.001, "center": true, "scale": true, "beta_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 22}, "gamma_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 23}, "moving_mean_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 24}, "moving_variance_initializer": {"class_name": "Ones", "config": {}, "shared_object_id": 25}, "beta_regularizer": null, "gamma_regularizer": null, "beta_constraint": null, "gamma_constraint": null}, "inbound_nodes": [[["conv2d_2", 0, 0, {}]]], "shared_object_id": 26, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 4, "max_ndim": null, "min_ndim": null, "axes": {"3": 16}}, "shared_object_id": 40}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16, 16, 16]}}2 + + root.layer-9"_tf_keras_layer*{"name": "add", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Add", "config": {"name": "add", "trainable": true, "dtype": "float32"}, "inbound_nodes": [[["batch_normalization_2", 0, 0, {}], ["re_lu", 0, 0, {}]]], "shared_object_id": 27, "build_input_shape": [{"class_name": "TensorShape", "items": [null, 16, 16, 16]}, {"class_name": "TensorShape", "items": [null, 16, 16, 16]}]}2 +  root.layer-10"_tf_keras_layer*{"name": "re_lu_2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "ReLU", "config": {"name": "re_lu_2", "trainable": true, "dtype": "float32", "max_value": null, "negative_slope": 0.0, "threshold": 0.0}, "inbound_nodes": [[["add", 0, 0, {}]]], "shared_object_id": 28, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16, 16, 16]}}2 +  root.layer-11"_tf_keras_layer*{"name": "global_average_pooling2d", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GlobalAveragePooling2D", "config": {"name": "global_average_pooling2d", "trainable": true, "dtype": "float32", "data_format": "channels_last", "keepdims": false}, "inbound_nodes": [[["re_lu_2", 0, 0, {}]]], "shared_object_id": 29, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 4, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 41}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16, 16, 16]}}2 + root.layer_with_weights-6"_tf_keras_layer*{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 10, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "GlorotNormal", "config": {"seed": 133}, "shared_object_id": 30}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 31}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["global_average_pooling2d", 0, 0, {}]]], "shared_object_id": 32, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 16}}, "shared_object_id": 42}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16]}}2 \ No newline at end of file diff --git a/data/tensorflow_model/saved_model.pb b/data/tensorflow_model/saved_model.pb new file mode 100644 index 0000000..b82c6d7 Binary files /dev/null and b/data/tensorflow_model/saved_model.pb differ diff --git a/data/tensorflow_model/variables/variables.data-00000-of-00001 b/data/tensorflow_model/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000..df01897 Binary files /dev/null and b/data/tensorflow_model/variables/variables.data-00000-of-00001 differ diff --git a/data/tensorflow_model/variables/variables.index b/data/tensorflow_model/variables/variables.index new file mode 100644 index 0000000..d339b05 Binary files /dev/null and b/data/tensorflow_model/variables/variables.index differ diff --git a/spack_repo_dalotia/packages/dalotia/package.py b/spack_repo_dalotia/packages/dalotia/package.py index 5cd634b..725982a 100644 --- a/spack_repo_dalotia/packages/dalotia/package.py +++ b/spack_repo_dalotia/packages/dalotia/package.py @@ -20,11 +20,11 @@ class Dalotia(CMakePackage): version("main", branch="main") version("1.0.0", tag="v1.0.0") - variant("tests", default=True, description="build dalotia tests") variant("cpp_pmr", default=True, description="use polymorphic memory resources (pmr) C++17 feature for dalotia") variant("openmp", default=True, description="Build with OpenMP support") variant("safetensorscpp", default=True, description="use safetensors-cpp for tensor I/O") variant("fortran", default=True, description="Build Fortran interface") + variant("tensorflow", default=False, description="Build with TensorFlow support") depends_on("cxx", type="build") depends_on("c", type="build") @@ -36,10 +36,10 @@ class Dalotia(CMakePackage): def cmake_args(self): args = [ self.define("DALOTIA_CPP_BUILD_EXAMPLES", True), - self.define_from_variant("DALOTIA_BUILD_TESTS", "tests"), self.define_from_variant("DALOTIA_WITH_CPP_PMR", "cpp_pmr"), self.define_from_variant("DALOTIA_WITH_OPENMP", "openmp"), self.define_from_variant("DALOTIA_WITH_SAFETENSORS_CPP", "safetensorscpp"), + self.define_from_variant("DALOTIA_WITH_TENSORFLOW", "tensorflow"), self.define_from_variant("DALOTIA_WITH_FORTRAN", "fortran"), ] if self.spec.satisfies("+safetensorscpp"): diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 881b341..07e3eab 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,7 +1,7 @@ add_library(dalotia_cpp dalotia.cpp) # Daniel Pfeifer says: no variables target_sources(dalotia_cpp PRIVATE dalotia_assignment.cpp dalotia_formats.cpp ) set_target_properties(dalotia_cpp PROPERTIES PUBLIC_HEADER - "dalotia.h;dalotia_formats.h;dalotia.hpp;dalotia_formats.hpp;dalotia_assignment.hpp;dalotia_tensor_file.hpp;dalotia_safetensors_file.hpp") + "dalotia.h;dalotia_formats.h;dalotia.hpp;dalotia_formats.hpp;dalotia_assignment.hpp;dalotia_tensor_file.hpp;dalotia_safetensors_file.hpp;dalotia_tensorflow_file.hpp") # have one dalotia library target that can be used in C++ and Fortran add_library(dalotia INTERFACE) add_library(dalotia::dalotia_cpp ALIAS dalotia_cpp) @@ -26,6 +26,12 @@ if (DALOTIA_WITH_SAFETENSORS_CPP) target_sources(dalotia_cpp PRIVATE dalotia_safetensors_file.cpp ) endif (DALOTIA_WITH_SAFETENSORS_CPP) +if (DALOTIA_WITH_TENSORFLOW) + target_link_libraries(dalotia_cpp PUBLIC tensorflow::tensorflow) + target_compile_options(dalotia_cpp PUBLIC "-DDALOTIA_WITH_TENSORFLOW") + target_sources(dalotia_cpp PRIVATE dalotia_tensorflow_file.cpp ) +endif (DALOTIA_WITH_TENSORFLOW) + # not sure if this is elegant, but helps to make this compatible to all languages target_sources(dalotia_cpp PRIVATE dalotia.hpp dalotia.h) target_include_directories(dalotia_cpp PUBLIC @@ -40,14 +46,14 @@ else() target_compile_options(dalotia_cpp PRIVATE -Wall -Wextra) endif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") -install(TARGETS dalotia_cpp EXPORT dalotia +install(TARGETS dalotia_cpp EXPORT dalotia_export_set LIBRARY DESTINATION lib INCLUDES DESTINATION include PUBLIC_HEADER DESTINATION include ) if (DALOTIA_WITH_FORTRAN) - install(TARGETS dalotia_fortran EXPORT dalotia + install(TARGETS dalotia_fortran EXPORT dalotia_export_set LIBRARY DESTINATION lib INCLUDES DESTINATION include PUBLIC_HEADER DESTINATION include diff --git a/src/dalotia.cpp b/src/dalotia.cpp index 2e6ae02..3f7946a 100644 --- a/src/dalotia.cpp +++ b/src/dalotia.cpp @@ -3,9 +3,20 @@ #include namespace dalotia { using file_exists = std::filesystem::exists; -} -#else // __cpp_lib_filesystem +using is_directory = std::filesystem::is_directory; +} // namespace dalotia +#else // __cpp_lib_filesystem namespace dalotia { +#include + +bool is_directory(const char *path) { + struct stat st; + if (stat(path, &st) == 0) { + return S_ISDIR(st.st_mode); + } + return false; +} + bool file_exists(const std::string &name) { if (FILE *file = fopen(name.c_str(), "r")) { fclose(file); @@ -43,6 +54,12 @@ TensorFile *make_tensor_file(const std::string &filename) { #else // DALOTIA_WITH_SAFETENSORS_CPP throw std::runtime_error("Safetensors support not enabled"); #endif // DALOTIA_WITH_SAFETENSORS_CPP + } else if (extension == "keras" || extension == "pb" || is_directory(filename.c_str())) { +#ifdef DALOTIA_WITH_TENSORFLOW + return new TensorflowSavedModel(filename); +#else // DALOTIA_WITH_TENSORFLOW + throw std::runtime_error("Tensorflow support not enabled"); +#endif // DALOTIA_WITH_TENSORFLOW } else { throw std::runtime_error("Unsupported file extension: ." + extension); } diff --git a/src/dalotia.hpp b/src/dalotia.hpp index 999a801..8b96cc7 100644 --- a/src/dalotia.hpp +++ b/src/dalotia.hpp @@ -17,6 +17,9 @@ #ifdef DALOTIA_WITH_SAFETENSORS_CPP #include "dalotia_safetensors_file.hpp" #endif +#ifdef DALOTIA_WITH_TENSORFLOW +#include "dalotia_tensorflow_file.hpp" +#endif namespace dalotia { // factory function for the file, selected by file extension and diff --git a/src/dalotia_assignment.cpp b/src/dalotia_assignment.cpp index 3986f9c..d648c37 100644 --- a/src/dalotia_assignment.cpp +++ b/src/dalotia_assignment.cpp @@ -200,8 +200,8 @@ void assign_linearly(dalotia_byte *__restrict__ dest, */ template std::pair, size_t> get_new_strides_permuted( - const size_t *const input_shape, const int *permutation) { - auto desired_shape = std::vector(num_dimensions); + const int *const input_shape, const int *permutation) { + auto desired_shape = std::vector(num_dimensions); size_t total_size = 1; for (size_t i = 0; i < num_dimensions; ++i) { desired_shape[i] = input_shape[permutation[i]]; @@ -227,7 +227,7 @@ std::pair, size_t> get_new_strides_permuted( template <> void assign_permuted<1>(dalotia_byte *__restrict__ dest, dalotia_WeightFormat weight_output_format, - const size_t *const input_shape, + const int *const input_shape, const dalotia_byte *__restrict__ tensor_start, dalotia_WeightFormat weight_input_format, [[maybe_unused]] const int *permutation) { @@ -239,12 +239,12 @@ void assign_permuted<1>(dalotia_byte *__restrict__ dest, template <> void assign_permuted<2>(dalotia_byte *__restrict__ dest, dalotia_WeightFormat weight_output_format, - const size_t *const input_shape, + const int *const input_shape, const dalotia_byte *__restrict__ tensor_start, dalotia_WeightFormat weight_input_format, const int *permutation) { constexpr int num_dimensions = 2; - auto desired_shape = std::vector(num_dimensions); + auto desired_shape = std::vector(num_dimensions); [[maybe_unused]] size_t total_size = 1; for (size_t i = 0; i < num_dimensions; ++i) { desired_shape[i] = input_shape[permutation[i]]; @@ -259,8 +259,8 @@ void assign_permuted<2>(dalotia_byte *__restrict__ dest, auto assign_function = get_assignment_function(weight_output_format, weight_input_format); size_t load_index = 0; - for (size_t i = 0; i < input_shape[1]; ++i) { - for (size_t j = 0; j < input_shape[0]; ++j) { + for (int i = 0; i < input_shape[1]; ++i) { + for (int j = 0; j < input_shape[0]; ++j) { auto store_index = j * input_shape[1] + i; auto input_pointer = tensor_start + load_index * load_item_bytes; auto output_pointer = dest + store_index * store_item_bytes; @@ -275,7 +275,7 @@ void assign_permuted<2>(dalotia_byte *__restrict__ dest, template <> void assign_permuted<3>(dalotia_byte *__restrict__ dest, dalotia_WeightFormat weight_output_format, - const size_t *const input_shape, + const int *const input_shape, const dalotia_byte *__restrict__ tensor_start, dalotia_WeightFormat weight_input_format, const int *permutation) { @@ -291,9 +291,9 @@ void assign_permuted<3>(dalotia_byte *__restrict__ dest, get_assignment_function(weight_output_format, weight_input_format); auto input_pointer = tensor_start; size_t store_index = 0; - for (size_t i = 0; i < input_shape[0]; ++i) { - for (size_t j = 0; j < input_shape[1]; ++j) { - for (size_t k = 0; k < input_shape[2]; ++k) { + for (int i = 0; i < input_shape[0]; ++i) { + for (int j = 0; j < input_shape[1]; ++j) { + for (int k = 0; k < input_shape[2]; ++k) { assert(static_cast(store_index) == std::inner_product(new_strides_permuted.begin(), new_strides_permuted.end(), @@ -319,7 +319,7 @@ void assign_permuted<3>(dalotia_byte *__restrict__ dest, template <> void assign_permuted<4>(dalotia_byte *__restrict__ dest, dalotia_WeightFormat weight_output_format, - const size_t *const input_shape, + const int *const input_shape, const dalotia_byte *__restrict__ tensor_start, dalotia_WeightFormat weight_input_format, const int *permutation) { @@ -335,10 +335,10 @@ void assign_permuted<4>(dalotia_byte *__restrict__ dest, get_assignment_function(weight_output_format, weight_input_format); auto input_pointer = tensor_start; size_t store_index = 0; - for (size_t i = 0; i < input_shape[0]; ++i) { - for (size_t j = 0; j < input_shape[1]; ++j) { - for (size_t k = 0; k < input_shape[2]; ++k) { - for (size_t l = 0; l < input_shape[3]; ++l) { + for (int i = 0; i < input_shape[0]; ++i) { + for (int j = 0; j < input_shape[1]; ++j) { + for (int k = 0; k < input_shape[2]; ++k) { + for (int l = 0; l < input_shape[3]; ++l) { assert(static_cast(store_index) == std::inner_product(new_strides_permuted.begin(), new_strides_permuted.end(), @@ -368,7 +368,7 @@ void assign_permuted<4>(dalotia_byte *__restrict__ dest, template <> void assign_permuted<5>(dalotia_byte *__restrict__ dest, dalotia_WeightFormat weight_output_format, - const size_t *const input_shape, + const int *const input_shape, const dalotia_byte *__restrict__ tensor_start, dalotia_WeightFormat weight_input_format, const int *permutation) { @@ -384,11 +384,11 @@ void assign_permuted<5>(dalotia_byte *__restrict__ dest, get_assignment_function(weight_output_format, weight_input_format); auto input_pointer = tensor_start; size_t store_index = 0; - for (size_t i = 0; i < input_shape[0]; ++i) { - for (size_t j = 0; j < input_shape[1]; ++j) { - for (size_t k = 0; k < input_shape[2]; ++k) { - for (size_t l = 0; l < input_shape[3]; ++l) { - for (size_t m = 0; m < input_shape[4]; ++m) { + for (int i = 0; i < input_shape[0]; ++i) { + for (int j = 0; j < input_shape[1]; ++j) { + for (int k = 0; k < input_shape[2]; ++k) { + for (int l = 0; l < input_shape[3]; ++l) { + for (int m = 0; m < input_shape[4]; ++m) { assert(static_cast(store_index) == std::inner_product(new_strides_permuted.begin(), new_strides_permuted.end(), diff --git a/src/dalotia_assignment.hpp b/src/dalotia_assignment.hpp index b3ae548..8f30eef 100644 --- a/src/dalotia_assignment.hpp +++ b/src/dalotia_assignment.hpp @@ -63,7 +63,7 @@ void assign_linearly(dalotia_byte *__restrict__ dest, template void assign_permuted(dalotia_byte *__restrict__ /*dest*/, dalotia_WeightFormat /*weight_output_format*/, - const size_t *const /*input_shape*/, + const int *const /*input_shape*/, const dalotia_byte *__restrict__ /*tensor_start*/, dalotia_WeightFormat /*weight_input_format*/, const int * /*permutation*/) { @@ -75,7 +75,7 @@ void assign_permuted(dalotia_byte *__restrict__ /*dest*/, template <> void assign_permuted<1>(dalotia_byte *__restrict__ dest, dalotia_WeightFormat weight_output_format, - const size_t *const input_shape, + const int *const input_shape, const dalotia_byte *__restrict__ tensor_start, dalotia_WeightFormat weight_input_format, const int *permutation); @@ -84,7 +84,7 @@ void assign_permuted<1>(dalotia_byte *__restrict__ dest, template <> void assign_permuted<2>(dalotia_byte *__restrict__ dest, dalotia_WeightFormat weight_output_format, - const size_t *const input_shape, + const int *const input_shape, const dalotia_byte *__restrict__ tensor_start, dalotia_WeightFormat weight_input_format, const int *permutation); @@ -93,7 +93,7 @@ void assign_permuted<2>(dalotia_byte *__restrict__ dest, template <> void assign_permuted<3>(dalotia_byte *__restrict__ dest, dalotia_WeightFormat weight_output_format, - const size_t *const input_shape, + const int *const input_shape, const dalotia_byte *__restrict__ tensor_start, dalotia_WeightFormat weight_input_format, const int *permutation); @@ -102,7 +102,7 @@ void assign_permuted<3>(dalotia_byte *__restrict__ dest, template <> void assign_permuted<4>(dalotia_byte *__restrict__ dest, dalotia_WeightFormat weight_output_format, - const size_t *const input_shape, + const int *const input_shape, const dalotia_byte *__restrict__ tensor_start, dalotia_WeightFormat weight_input_format, const int *permutation); @@ -111,7 +111,7 @@ void assign_permuted<4>(dalotia_byte *__restrict__ dest, template <> void assign_permuted<5>(dalotia_byte *__restrict__ dest, dalotia_WeightFormat weight_output_format, - const size_t *const input_shape, + const int *const input_shape, const dalotia_byte *__restrict__ tensor_start, dalotia_WeightFormat weight_input_format, const int *permutation); diff --git a/src/dalotia_safetensors_file.cpp b/src/dalotia_safetensors_file.cpp index 93898e3..e7dc267 100644 --- a/src/dalotia_safetensors_file.cpp +++ b/src/dalotia_safetensors_file.cpp @@ -128,8 +128,10 @@ void SafetensorsFile::load_tensor_dense(const std::string &tensor_name, reinterpret_cast(databuffer) + safetensor.data_offsets[0]; if (!final_permutation_in_c_order.empty()) { + std::vector input_shape( + safetensor.shape.begin(), safetensor.shape.end()); assign_permuted(num_dimensions, tensor, weightFormat, - safetensor.shape.data(), tensor_start, + input_shape.data(), tensor_start, input_weight_format, final_permutation_in_c_order.data()); } else { diff --git a/src/dalotia_safetensors_file.hpp b/src/dalotia_safetensors_file.hpp index 9a9a099..b8fd0d6 100644 --- a/src/dalotia_safetensors_file.hpp +++ b/src/dalotia_safetensors_file.hpp @@ -1,10 +1,12 @@ #pragma once #include +#include #include +#include #include "dalotia_formats.hpp" -#include "safetensors.hh" #include "dalotia_tensor_file.hpp" +#include "safetensors.hh" namespace dalotia { @@ -29,7 +31,7 @@ class SafetensorsFile : public TensorFile { public: explicit SafetensorsFile(const std::string &filename); - ~SafetensorsFile(); + ~SafetensorsFile() override; const std::vector &get_tensor_names() const override; diff --git a/src/dalotia_tensor_file.hpp b/src/dalotia_tensor_file.hpp index 31b522b..feb4e8f 100644 --- a/src/dalotia_tensor_file.hpp +++ b/src/dalotia_tensor_file.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "dalotia_formats.hpp" @@ -116,6 +117,11 @@ class TensorFile { if constexpr (std::is_same_v) { tensor.resize(total_size * sizeof_weight_format(weight_format)); } else { + if (dalotia::sizeof_weight_format(weight_format) != + sizeof(value_type)) { + throw std::runtime_error( + "load_tensor_dense: weight format size does not match value type size"); + } tensor.resize(total_size); } this->load_tensor_dense(tensor_name, weight_format, ordering, @@ -182,4 +188,21 @@ class TensorFile { // FILE *file_ = nullptr; }; +// helper function to output iterables +template +inline std::string to_string(const Iterable &iterable) { + std::string result; + for (const auto &item : iterable) { + if (!result.empty()) { + result += ", "; + } + if constexpr (std::is_same_v, std::string>) { + result += item; // for strings, just append + } else { + result += std::to_string(item); // for other types, convert to string + } + } + return result; +} + } // namespace dalotia diff --git a/src/dalotia_tensorflow_file.cpp b/src/dalotia_tensorflow_file.cpp new file mode 100644 index 0000000..3aaa58d --- /dev/null +++ b/src/dalotia_tensorflow_file.cpp @@ -0,0 +1,237 @@ +#include "dalotia_tensorflow_file.hpp" + +#include +#include + +#include "dalotia_assignment.hpp" +#include "dalotia_formats.hpp" + +namespace dalotia { + +TF_Output get_operation_from_name(const std::string &tensor_name, + std::shared_ptr graph) { + TF_Operation *oper = TF_GraphOperationByName(graph.get(), tensor_name.c_str()); + return {oper, 0}; +} + +// parts of this code are intensely based on cppflow, esp. tf_status_check and the +// constructor -- so here goes their license for the respective parts: + +// MIT License +// +// Copyright (c) 2019 Sergio Izquierdo +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +inline bool tf_status_check(std::shared_ptr status) { + // cf. https://github.com/serizba/cppflow/blob/master/include/cppflow/context.h#L45 + if (TF_GetCode(status.get()) != TF_OK) { + throw std::runtime_error(TF_Message(status.get())); + } + return true; +} + +int tf_get_num_dimensions(TF_Output output, std::shared_ptr graph, + std::shared_ptr status) { + // TF_DataType dtype = TF_OperationOutputType(output); + int num_dimensions = TF_GraphGetTensorNumDims(graph.get(), output, status.get()); + tf_status_check(status); + return num_dimensions; +} + +TensorflowSavedModel::TensorflowSavedModel(const std::string &filename) + : TensorFile(filename) { + // cf. + // https://github.com/serizba/cppflow/blob/master/include/cppflow/model.h + this->status_ = {TF_NewStatus(), &TF_DeleteStatus}; + this->graph_ = {TF_NewGraph(), TF_DeleteGraph}; + + // Create the session. + std::unique_ptr + session_options = {TF_NewSessionOptions(), TF_DeleteSessionOptions}; + + auto session_deleter = [this](TF_Session *sess) { + TF_DeleteSession(sess, this->status_.get()); + tf_status_check(this->status_); + }; + + std::unique_ptr run_options = { + TF_NewBufferFromString("", 0), TF_DeleteBuffer}; + std::unique_ptr meta_graph = {TF_NewBuffer(), + TF_DeleteBuffer}; + + int tag_len = 1; + const char *tag = "serve"; + this->session_ = {TF_LoadSessionFromSavedModel(session_options.get(), + run_options.get(), filename.c_str(), + &tag, tag_len, this->graph_.get(), + meta_graph.get(), this->status_.get()), + session_deleter}; + tf_status_check(this->status_); + + { // create and fill the tensor names vector + size_t pos = 0; + TF_Operation *oper; + while ((oper = TF_GraphNextOperation(graph_.get(), &pos)) != nullptr) { + const char *op_name = TF_OperationName(oper); + tensor_names_.emplace_back(op_name); + } + } +} + +TensorflowSavedModel::~TensorflowSavedModel() = default; + +const std::vector &TensorflowSavedModel::get_tensor_names() const { + return tensor_names_; +} + +bool TensorflowSavedModel::is_sparse(const std::string & /*tensor_name*/) const { + return false; +} + +size_t TensorflowSavedModel::get_num_dimensions(const std::string &tensor_name) const { + TF_Output output = get_operation_from_name(tensor_name, this->graph_); + if (output.oper == nullptr) { + throw std::runtime_error( + "Tensor not found: " + tensor_name + + ". Tensor names in the file: " + to_string(tensor_names_)); + } + if (tensor_name == "NoOp") { + // NoOp is a special operation in TensorFlow, it has no dimensions + // (weird vector error otherwise) + return 0; + } + int num_dimensions = tf_get_num_dimensions(output, this->graph_, this->status_); + if (num_dimensions < 0) { + throw std::runtime_error("Failed to get number of dimensions for tensor: " + + tensor_name); + } + return num_dimensions; +} + +std::vector +TensorflowSavedModel::get_tensor_extents(const std::string &tensor_name, + const std::vector &permutation) const { + TF_Output output = get_operation_from_name(tensor_name, this->graph_); + if (output.oper == nullptr) { + throw std::runtime_error( + "Tensor not found: " + tensor_name + + ". Tensor names in the file: " + to_string(tensor_names_)); + } + + int num_dimensions = tf_get_num_dimensions(output, this->graph_, this->status_); + std::vector extents_read(num_dimensions); + TF_GraphGetTensorShape(this->graph_.get(), output, extents_read.data(), + extents_read.size(), this->status_.get()); + tf_status_check(this->status_); + + std::vector extents(extents_read.size()); + + if (!permutation.empty()) { + auto final_permutation_in_c_order = + final_c_permutation_from_permutation_and_order( + permutation, dalotia_Ordering::dalotia_C_ordering, extents.size()); + if (!final_permutation_in_c_order.empty()) { + for (size_t i = 0; i < extents.size(); i++) { + extents[i] = extents_read[final_permutation_in_c_order[i]]; + } + } + } else { + extents.assign(extents_read.begin(), extents_read.end()); + } + return extents; +} + +void TensorflowSavedModel::load_tensor_dense(const std::string &tensor_name, + dalotia_WeightFormat weightFormat, + dalotia_Ordering ordering, + dalotia_byte *__restrict__ tensor, + const std::vector &permutation) { + const TF_Tensor *tf_tensor = this->get_tensor_pointer_from_name(tensor_name); + void *databuffer = TF_TensorData(tf_tensor); + int num_dimensions = TF_NumDims(tf_tensor); + const int64_t num_tensor_elements = TF_TensorElementCount(tf_tensor); + + TF_DataType tf_type = TF_TensorType(tf_tensor); + const dalotia_WeightFormat input_weight_format = tensorflow_type_map.at(tf_type); +#ifndef NDEBUG + assert(databuffer != nullptr); + assert(tf_tensor != nullptr); + assert(num_dimensions == static_cast(this->get_num_dimensions(tensor_name))); + assert(num_dimensions >= 0); + assert(num_tensor_elements == + static_cast(this->get_num_tensor_elements(tensor_name))); + size_t num_bytes = TF_TensorByteSize(tf_tensor); + assert(num_bytes == + static_cast(dalotia::sizeof_weight_format(input_weight_format)) * + num_tensor_elements); +#endif // NDEBUG + + auto *tensor_start = reinterpret_cast(databuffer); + + auto final_permutation_in_c_order = final_c_permutation_from_permutation_and_order( + permutation, ordering, num_dimensions); + if (!final_permutation_in_c_order.empty()) { + std::vector input_shape = this->get_tensor_extents(tensor_name); + dalotia::assign_permuted(num_dimensions, tensor, weightFormat, input_shape.data(), + tensor_start, input_weight_format, + final_permutation_in_c_order.data()); + } else { + dalotia::assign_linearly(tensor, weightFormat, num_tensor_elements, tensor_start, + input_weight_format); + } +} + +std::vector +TensorflowSavedModel::get_tensor_pointers(const std::string &tensor_name) { + const TF_Tensor *tf_tensor = this->get_tensor_pointer_from_name(tensor_name); + return std::vector( + 1, reinterpret_cast(TF_TensorData(tf_tensor))); +} + +const TF_Tensor * +TensorflowSavedModel::get_tensor_pointer_from_name(const std::string &tensor_name) { + // check if it is already in the cache + auto it = tensors_.find(tensor_name); + if (it != tensors_.end()) { + return it->second.get(); + } else { + // if not, load it from the graph + TF_Output output = get_operation_from_name(tensor_name, this->graph_); + if (output.oper == nullptr) { + throw std::runtime_error( + "Tensor not found: " + tensor_name + + ". Tensor names in the file: " + to_string(tensor_names_)); + } + + TF_Tensor *tf_tensor = nullptr; + TF_SessionRun(this->session_.get(), nullptr, nullptr, nullptr, 0, &output, + &tf_tensor, 1, nullptr, 0, nullptr, this->status_.get()); + if (tf_tensor == nullptr) { + throw std::runtime_error("Failed to load tensor: " + tensor_name); + } + tf_status_check(this->status_); + auto [position, inserted] = this->tensors_.emplace( + tensor_name, std::unique_ptr( + tf_tensor, &TF_DeleteTensor)); + assert(inserted); // should not already exist + return position->second.get(); + } +} +} // namespace dalotia diff --git a/src/dalotia_tensorflow_file.hpp b/src/dalotia_tensorflow_file.hpp new file mode 100644 index 0000000..5ed8920 --- /dev/null +++ b/src/dalotia_tensorflow_file.hpp @@ -0,0 +1,65 @@ +#pragma once +#include + +#include +#include + +#include "dalotia_formats.hpp" +#include "dalotia_tensor_file.hpp" + +namespace dalotia { +// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/tf_datatype.h +static const std::map tensorflow_type_map{ + {TF_DOUBLE, dalotia_WeightFormat::dalotia_float_64}, + {TF_FLOAT, dalotia_WeightFormat::dalotia_float_32}, + {TF_HALF, dalotia_WeightFormat::dalotia_float_16}, + {TF_BFLOAT16, dalotia_WeightFormat::dalotia_bfloat_16}, + // {TF_BOOL, dalotia_WeightFormat::dalotia_bool}, + {TF_INT8, dalotia_WeightFormat::dalotia_int_8}, + {TF_UINT8, dalotia_WeightFormat::dalotia_uint_8}, + {TF_INT16, dalotia_WeightFormat::dalotia_int_16}, + {TF_UINT16, dalotia_WeightFormat::dalotia_uint_16}, + {TF_INT32, dalotia_WeightFormat::dalotia_int_32}, + {TF_UINT32, dalotia_WeightFormat::dalotia_uint_32}, + // {TF_INT64, dalotia_WeightFormat::dalotia_int_64}, + // {TF_UINT64, dalotia_WeightFormat::dalotia_uint_64}, + // {TF_FLOAT8_E5M2, dalotia_WeightFormat::dalotia_float_8_e5m2}, + // {TF_INT2, dalotia_WeightFormat::dalotia_int_2}, +}; + +class TensorflowSavedModel : public TensorFile { + public: + explicit TensorflowSavedModel(const std::string &filename); + + ~TensorflowSavedModel() override; + + const std::vector &get_tensor_names() const override; + + bool is_sparse(const std::string &tensor_name) const override; + + size_t get_num_dimensions(const std::string &tensor_name) const override; + + std::vector + get_tensor_extents(const std::string &tensor_name = "", + const std::vector &permutation = {}) const override; + + void load_tensor_dense(const std::string &tensor_name, + dalotia_WeightFormat weightFormat, dalotia_Ordering ordering, + dalotia_byte *__restrict__ tensor, + const std::vector &permutation = {}) override; + + std::vector get_tensor_pointers(const std::string &tensor_name); + + // cf. https://github.com/serizba/cppflow/blob/master/include/cppflow/model.h + std::shared_ptr status_; + std::shared_ptr graph_; + std::shared_ptr session_; + std::vector tensor_names_; + std::map> + tensors_; // cache for loaded tensor pointers + + private: + const TF_Tensor *get_tensor_pointer_from_name(const std::string &tensor_name); +}; + +} // namespace dalotia \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9ed462b..2156828 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,3 +1,9 @@ +list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure") + +# cf. https://stackoverflow.com/questions/52730994/how-to-pass-arguments-to-memcheck-with-ctest +set(MEMORYCHECK_COMMAND_OPTIONS + "--gen-suppressions=all --suppressions=${CMAKE_CURRENT_SOURCE_DIR}/tensorflow.supp --leak-check=full" +) include (CTest) if(DALOTIA_WITH_SAFETENSORS_CPP) @@ -24,3 +30,10 @@ if(DALOTIA_WITH_SAFETENSORS_CPP) add_test( mnist_load_fortran test_mnist_fortran ) endif (DALOTIA_WITH_FORTRAN) endif (DALOTIA_WITH_SAFETENSORS_CPP) + +if (DALOTIA_WITH_TENSORFLOW) + add_executable( test_tensorflow test_tensorflow.cpp ) + target_link_libraries( test_tensorflow dalotia_cpp tensorflow::tensorflow ) + target_include_directories( test_tensorflow PUBLIC ${tensorflow_INCLUDE_DIRS}) + add_test( tensorflow-file test_tensorflow ) +endif (DALOTIA_WITH_TENSORFLOW) diff --git a/test/tensorflow.supp b/test/tensorflow.supp new file mode 100644 index 0000000..551e3cf --- /dev/null +++ b/test/tensorflow.supp @@ -0,0 +1,183 @@ +{ + value8_tensorflow_and_deps + Memcheck:Value8 + ... + fun:_ZN10tensorflow10checkpoint25CheckpointCallbackManager32GetCheckpointIdAndPathFromPrefixB5cxx11ESt17basic_string_viewIcSt11char_traitsIcEE + ... +} +{ + cond_isOnePass_tensorflow_and_deps + Memcheck:Cond + ... + fun:_ZN10tensorflow10checkpoint25CheckpointCallbackManager32GetCheckpointIdAndPathFromPrefixB5cxx11ESt17basic_string_viewIcSt11char_traitsIcEE + ... +} +# now, possible mem leaks, first the ones that don't reference tf in frame trace at all: +{ + possible_leak_tensorflow_sub_I_host + Memcheck:Leak + ... + fun:_GLOBAL__sub_I_host_platform.cc + ... + fun:_dl_init + ... +} +{ + possible_leak_tensorflow_sub_I_cuda_platform + Memcheck:Leak + match-leak-kinds: possible + fun:_Znwm + ... + fun:_GLOBAL__sub_I_cuda_platform.cc + ... + fun:_dl_init + ... +} +{ + possible_leak_tensorflow_cutlass_gemm + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_GLOBAL__sub_I_cutlass_gemm_fusion.cc + ... + fun:_dl_init + ... +} +# then, tf calls that originate in init and appear in everything linked with tf +{ + possible_leak_tensorflow_register_op + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZN10tensorflow11register_op* + ... +} +{ + possible_leak_tensorflow_new_op_definition_builder + Memcheck:Leak + match-leak-kinds: possible + ... + fun:TF_NewOpDefinitionBuilder + ... + fun:_dl_init + ... +} +{ + possible_leak_tensorflow_register_kernel_builder + Memcheck:Leak + match-leak-kinds: possible + ... + fun:TF_RegisterKernelBuilderWithKernelDef + ... + fun:_dl_init + ... +} +{ + possible_leak_tensorflow_kernel_registrar + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZN10tensorflow14kernel_factory17OpKernelRegistrarC2EPKNS_9KernelDefESt17basic_string_viewIcSt11char_traitsIcEEPFPNS_8OpKernelEPNS_20OpKernelConstructionEE + ... + fun:_dl_init + ... +} +{ + possible_leak_tensorflow_dataset_experiment_registrar + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZN10tensorflow4data26DatasetExperimentRegistrarC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt8functionIFbmEESA_IFblbEE + ... + fun:_dl_init + ... +} +# and then the actual ones that show up when running tensorflow +{ + possible_leak_tensorflow_shape_refiner + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZN10tensorflow12ShapeRefinerC1EiPKNS_19OpRegistryInterfaceE + ... +} +{ + possible_leak_tensorflow_graph_registry + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZNK10tensorflow10OpRegistry10LookUpSlowERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE + ... +} +{ + possible_leak_tensorflow_saved_model_internal + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZN10tensorflow22LoadSavedModelInternalERKNS_14SessionOptionsERKNS_10RunOptionsERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt13unordered_setISB_St4hashISB_ESt8equal_toISB_ESaISB_EEPNS_16SavedModelBundleE + ... +} +{ + possible_leak_tensorflow_absl_hash_set_policy + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZNK4absl12lts_2023080218container_internal12raw_hash_setINS1_17FlatHashSetPolicyINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEENS1_10StringHashENS1_8StringEqESaIS9_EE19EmplaceDecomposableclIPKcJRKSI_EEESt4pairINSE_8iteratorEbERKT_DpOT0_ + ... +} +{ + possible_leak_tensorflow_absl_container_internal + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZN4absl12lts_2023080218container_internal* + ... +} +{ + possible_leak_tensorflow_absl_flags_internal + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZN4absl12lts_2023080214flags_internal23RegisterCommandLineFlagERNS0_15CommandLineFlagEPKc + ... +} +{ + possible_leak_tensorflow_proto_parse_from_string + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZN10tensorflow20ProtoParseFromStringERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS_9AttrValueE + ... +} +{ + possible_leak_tensorflow_base_gpu_device_factory + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZN10tensorflow20BaseGPUDeviceFactory13CreateDevicesERKNS_14SessionOptionsERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPSt6vectorISt10unique_ptrINS_6DeviceESt14default_deleteISE_EESaISH_EE + ... +} +{ + possible_leak_tensorflow_build_graph + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZN10tensorflow19GraphExecutionState10BuildGraphERKNS_17BuildGraphOptionsEPSt10unique_ptrINS_11ClientGraphESt14default_deleteIS5_EE + ... +} +{ + possible_leak_tensorflow_local_Device + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZN10tensorflow11LocalDevice19EigenThreadPoolInfoC2ERKNS_14SessionOptionsEiPN3tsl9AllocatorE + ... +} +{ + possible_leak_tensorflow_kernel_def + Memcheck:Leak + match-leak-kinds: possible + ... + fun:_ZN10tensorflow24KernelDef_AttrConstraint9MergeImplERN6google8protobuf7MessageERKS3_ + ... +} diff --git a/test/test_helper.h b/test/test_helper.h new file mode 100644 index 0000000..527ef2c --- /dev/null +++ b/test/test_helper.h @@ -0,0 +1,17 @@ +#pragma once +#include +#include + +void assert_close(volatile float a, volatile float b) { + if (fabsf(a - b) > 1e-4) { + fprintf(stderr, "assert_close: expected %f but got %f\n", b, a); + assert(false); + } +} + +void assert_equal(volatile float a, volatile float b) { + if (a != b) { + fprintf(stderr, "assert_equal: expected %f but got %f\n", b, a); + assert(false); + } +} \ No newline at end of file diff --git a/test/test_load.c b/test/test_load.c index 5604395..89781a9 100644 --- a/test/test_load.c +++ b/test/test_load.c @@ -1,10 +1,10 @@ #include -#include #include #include #include #include "dalotia.h" +#include "test_helper.h" void test_get_tensor_names(const char* filename) { DalotiaTensorFile* dalotia_file = dalotia_open_file(filename); @@ -31,13 +31,6 @@ void test_get_tensor_names(const char* filename) { dalotia_close_file(dalotia_file); } -void assert_close(volatile float a, volatile float b) { - if (fabsf(a - b) > 1e-4) { - fprintf(stderr, "assert_close: expected %f but got %f\n", b, a); - assert(false); - } -} - void test_load(const char* filename, const char* tensor_name) { DalotiaTensorFile* dalotia_file = dalotia_open_file(filename); { diff --git a/test/test_tensorflow.cpp b/test/test_tensorflow.cpp new file mode 100644 index 0000000..2e23594 --- /dev/null +++ b/test/test_tensorflow.cpp @@ -0,0 +1,95 @@ +#include +#include + +#include "dalotia.h" +#include "dalotia.hpp" +#include "dalotia_tensorflow_file.hpp" +#include "test_helper.h" + +void test_names() { + std::string filename = "../data/tensorflow_model"; + constexpr dalotia_WeightFormat weightFormat = dalotia_WeightFormat::dalotia_float_64; + dalotia_Ordering ordering = dalotia_Ordering::dalotia_C_ordering; + + // test the TensorflowSavedModel class + std::unique_ptr dalotia_file( + dalotia::make_tensor_file(filename)); + if (dalotia_file == nullptr) { + throw std::runtime_error("Failed to open TensorFlow model file: " + filename); + } + auto tensor_names = dalotia_file->get_tensor_names(); + assert(!tensor_names.empty()); + std::cout << "Tensor names in the file: " << std::endl; + for (const auto &name : tensor_names) { + std::cout << " - " << name << std::endl; + } + + for (const auto &name : tensor_names) { + // for all tensor names, check if they are sparse and get their number of + // dimensions + bool is_sparse = dalotia_file->is_sparse(name); + assert(!is_sparse); + size_t num_dimensions = dalotia_file->get_num_dimensions(name); + if (num_dimensions < 0) { + throw std::runtime_error("Tensor " + name + " has " + + std::to_string(num_dimensions) + + " dimensions, which is unexpected."); + } + if (num_dimensions > 0) { + // test get_tensor_extents + auto extents = dalotia_file->get_tensor_extents(name); + // test load_tensor_dense + if (*std::min_element(extents.begin(), extents.end()) > 0) { + std::unique_ptr tensor( + new dalotia_byte[dalotia::sizeof_weight_format() * + dalotia_file->get_num_tensor_elements(name)]); + dalotia_file->load_tensor_dense(name, weightFormat, ordering, + tensor.get()); + } + } + } +#ifdef DALOTIA_WITH_CPP_PMR + { + std::string tensor_name = "dense/kernel/Read/ReadVariableOp"; + auto [extents, tensor_cpp_double] = dalotia::load_tensor_dense( + filename, tensor_name, weightFormat, ordering); + assert(!extents.empty()); + assert(!tensor_cpp_double.empty()); + auto [extents_float, tensor_cpp_float] = dalotia::load_tensor_dense( + filename, tensor_name, dalotia_WeightFormat::dalotia_float_32, ordering); + for (size_t i = 0; i < extents.size(); ++i) { + assert(extents[i] == extents_float[i]); + } + for (size_t i = 0; i < tensor_cpp_double.size(); ++i) { + assert(tensor_cpp_float[i] == static_cast(tensor_cpp_double[i])); + } + std::vector true_values_begin = { + -0.25138268, -0.25613192, 0.16491315, -0.13381714, 0.35687172, -0.35824186, + 0.3529436, -0.55490106, 0.27651784, 0.30784482, -0.2846631}; + for (size_t i = 0; i < true_values_begin.size(); ++i) { + assert_close(tensor_cpp_double[i], true_values_begin[i]); + } + assert_close(tensor_cpp_double.back(), -0.21346514); + + // check if this is also what is in the original buffer + if (auto dalotia_tensorflow_file = + dynamic_cast(dalotia_file.get())) { + const dalotia_byte *tensor_pointer = + dalotia_tensorflow_file->get_tensor_pointers(tensor_name)[0]; + const float *tensor_float_pointer = + reinterpret_cast(tensor_pointer); + for (size_t i = 0; i < tensor_cpp_float.size(); ++i) { + assert_equal(tensor_cpp_float[i], tensor_float_pointer[i]); + } + } else { + throw std::runtime_error("dalotia_file is not a TensorflowSavedModel"); + } + } +#endif // DALOTIA_WITH_CPP_PMR +} + +int main(int, char **) { + test_names(); + std::cout << "test_tensorflow succeded" << std::endl; + return 0; +} \ No newline at end of file