From 7e7389b5e3858f9eec9b397de2862c414e326636 Mon Sep 17 00:00:00 2001 From: Sonali Saha Date: Fri, 29 Oct 2021 17:06:16 +0530 Subject: [PATCH 1/3] Add TensorFlow examples - ResNet50 and BERT models Signed-off-by: Sonali Saha --- tensorflow/BERT/.gitignore | 3 + tensorflow/BERT/Makefile | 60 ++++++++++ tensorflow/BERT/python.manifest.template | 68 +++++++++++ tensorflow/README.md | 114 +++++++++++++++++++ tensorflow/ResNet50/.gitignore | 2 + tensorflow/ResNet50/Makefile | 49 ++++++++ tensorflow/ResNet50/python.manifest.template | 72 ++++++++++++ 7 files changed, 368 insertions(+) create mode 100644 tensorflow/BERT/.gitignore create mode 100755 tensorflow/BERT/Makefile create mode 100755 tensorflow/BERT/python.manifest.template create mode 100755 tensorflow/README.md create mode 100644 tensorflow/ResNet50/.gitignore create mode 100755 tensorflow/ResNet50/Makefile create mode 100755 tensorflow/ResNet50/python.manifest.template diff --git a/tensorflow/BERT/.gitignore b/tensorflow/BERT/.gitignore new file mode 100644 index 0000000..ca0b8b8 --- /dev/null +++ b/tensorflow/BERT/.gitignore @@ -0,0 +1,3 @@ +/models/ +/data/ +/output/ diff --git a/tensorflow/BERT/Makefile b/tensorflow/BERT/Makefile new file mode 100755 index 0000000..ea887ef --- /dev/null +++ b/tensorflow/BERT/Makefile @@ -0,0 +1,60 @@ +# BERT sample for Tensorflow + +ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine) +SGX_SIGNER_KEY ?= ../../../Pal/src/host/Linux-SGX/signer/enclave-key.pem + +ifeq ($(DEBUG),1) +GRAMINE_LOG_LEVEL = debug +else +GRAMINE_LOG_LEVEL = error +endif + +.PHONY: all +all: python.manifest +ifeq ($(SGX),1) +all: python.manifest.sgx python.sig python.token +endif + +BERT_DATASET = https://storage.googleapis.com/bert_models/2019_05_30/wwm_uncased_L-24_H-1024_A-16.zip +SQUAAD_DATASET = https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json +CHECKPOINTS = https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/bert_large_checkpoints.zip +BERT_FP32_MODEL = https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_4_0/fp32_bert_squad.pb + +collateral: + apt install unzip + test -d models || git clone https://github.com/IntelAI/models.git + mkdir -p data + test -f data/wwm_uncased_L-24_H-1024_A-16.zip || wget $(BERT_DATASET) -P data/ + test -d data/wwm_uncased_L-24_H-1024_A-16 || unzip data/wwm_uncased_L-24_H-1024_A-16.zip -d data + test -f data/wwm_uncased_L-24_H-1024_A-16/dev-v1.1.json || wget $(SQUAAD_DATASET) -P data/wwm_uncased_L-24_H-1024_A-16 + test -f data/bert_large_checkpoints.zip || wget $(CHECKPOINTS) -P data/ + test -d data/bert_large_checkpoints || unzip data/bert_large_checkpoints.zip -d data + test -f data/fp32_bert_squad.pb || wget $(BERT_FP32_MODEL) -P data/ + +python.manifest: python.manifest.template collateral + gramine-manifest \ + -Dlog_level=$(GRAMINE_LOG_LEVEL) \ + -Darch_libdir=$(ARCH_LIBDIR) \ + -Dentrypoint=$(realpath $(shell sh -c "command -v python3")) \ + -Dpythondistpath=$(PYTHONDISTPATH) \ + $< >$@ + +python.manifest.sgx: python.manifest + @test -s $(SGX_SIGNER_KEY) || \ + { echo "SGX signer private key was not found, please specify SGX_SIGNER_KEY!"; exit 1; } + gramine-sgx-sign \ + --key $(SGX_SIGNER_KEY) \ + --manifest $< --output $@ + +python.sig: python.manifest.sgx + +python.token: python.sig + gramine-sgx-get-token --output $@ --sig $< + +.PHONY: clean +clean: + $(RM) *.manifest *.manifest.sgx *.token *.sig + +.PHONY: distclean +distclean: clean + $(RM) -r models/ data/ diff --git a/tensorflow/BERT/python.manifest.template b/tensorflow/BERT/python.manifest.template new file mode 100755 index 0000000..65a2241 --- /dev/null +++ b/tensorflow/BERT/python.manifest.template @@ -0,0 +1,68 @@ +libos.entrypoint = "{{ entrypoint }}" +loader.preload = "file:{{ gramine.libos }}" + +loader.log_level = "{{ log_level }}" + +loader.insecure__use_cmdline_argv = true +loader.insecure__use_host_env = true +loader.insecure__disable_aslr = true + +loader.env.LD_LIBRARY_PATH = "{{ python.stdlib }}/lib:/lib:{{ arch_libdir }}:/usr/lib:/usr/{{ arch_libdir }}" + +loader.pal_internal_mem_size = "512M" + +fs.mount.lib.type = "chroot" +fs.mount.lib.path = "/lib" +fs.mount.lib.uri = "file:{{ gramine.runtimedir() }}" + +fs.mount.lib2.type = "chroot" +fs.mount.lib2.path = "{{ arch_libdir }}" +fs.mount.lib2.uri = "file:{{ arch_libdir }}" + +fs.mount.usr.type = "chroot" +fs.mount.usr.path = "/usr" +fs.mount.usr.uri = "file:/usr" + +fs.mount.pyhome.type = "chroot" +fs.mount.pyhome.path = "{{ python.stdlib }}" +fs.mount.pyhome.uri = "file:{{ python.stdlib }}" + +fs.mount.pydisthome.type = "chroot" +fs.mount.pydisthome.path = "{{ python.distlib }}" +fs.mount.pydisthome.uri = "file:{{ python.distlib }}" + +fs.mount.pydistpath.type = "chroot" +fs.mount.pydistpath.path = "{{ pythondistpath }}" +fs.mount.pydistpath.uri = "file:{{ pythondistpath }}" + +fs.mount.tmp.type = "chroot" +fs.mount.tmp.path = "/tmp" +fs.mount.tmp.uri = "file:/tmp" + +fs.mount.etc.type = "chroot" +fs.mount.etc.path = "/etc" +fs.mount.etc.uri = "file:/etc" + +sgx.enclave_size = "32G" +sgx.thread_num = 256 +sgx.preheat_enclave = true +sgx.nonpie_binary = true + +sgx.trusted_files = [ + "file:{{ gramine.runtimedir() }}/", + "file:{{ arch_libdir }}/", + "file:/usr/{{ arch_libdir }}/", + "file:{{ entrypoint }}/", + "file:{{ python.stdlib }}/", + "file:{{ python.distlib }}/", + "file:{{ pythondistpath }}/", +] + +sgx.allowed_files = [ + "file:/tmp/", + "file:/etc/", + "file:output/", + "file:models/", + "file:data/", + "file:root/.keras/keras.json", +] diff --git a/tensorflow/README.md b/tensorflow/README.md new file mode 100755 index 0000000..5e3d5a8 --- /dev/null +++ b/tensorflow/README.md @@ -0,0 +1,114 @@ +## Inference on TensorFlow BERT and ResNet50 models +This directory contains steps and artifacts to run inference with TensorFlow BERT and ResNet50 +sample workloads on Gramine. Specifically, both these examples use pre-trained models to run +inference. + +### Bidirectional Encoder Representations from Transformers (BERT): +BERT is a method of pre-training language representations and then use that trained model for +downstream NLP tasks like 'question answering'. BERT is an unsupervised, deeply bidirectional system +for pre-training NLP. +In this BERT sample, we use **BERT-Large, Uncased (Whole Word Masking)** model and perform int8 +inference. More details about BERT can be found at https://github.com/google-research/bert. + +### Residual Network (ResNet): +ResNet50 is a convolutional neural network that is 50 layers deep. +In this ResNet50 (v1.5) sample, we use a pre-trained model and perform int8 inference. +More details about ResNet50 can be found at https://github.com/IntelAI/models/tree/icx-launch-public/benchmarks/image_recognition/tensorflow/resnet50v1_5. + +## Pre-requisites +- Upgrade pip/pip3. +- Install TensorFlow using ``pip install intel-tensorflow-avx512==2.4.0``. + +## Build BERT or ResNet50 samples +- To build BERT sample, do ``cd BERT``. To build ResNet50 sample, do ``cd ResNet50``. +- To clean the sample, do ``make clean`` +- To clean and remove downloaded models and datasets, do ``make distclean`` +- To build the non-SGX version, do ``make PYTHONDISTPATH=path_to_python_dist_packages/`` +- To build the SGX version, do ``make PYTHONDISTPATH=path_to_python_dist_packages/ SGX=1`` +- Typically, ``path_to_python_dist_packages`` is ``/usr/local/lib/python3.6/dist-packages``, but can +change based on python's installation directory. +- Keras settings are configured in the file ``root/.keras/keras.json``. It is configured to use +TensorFlow as backend. + +**WARNING:** Building BERT sample downloads about 5GB of data. + +## Run inference on BERT model +- To run int8 inference on ``gramine-sgx`` (SGX version): +``` +OMP_NUM_THREADS=36 KMP_AFFINITY=granularity=fine,verbose,compact,1,0 taskset -c 0-35 gramine-sgx \ +./python models/models/language_modeling/tensorflow/bert_large/inference/run_squad.py \ +--init_checkpoint=data/bert_large_checkpoints/model.ckpt-3649 \ +--vocab_file=data/wwm_uncased_L-24_H-1024_A-16/vocab.txt \ +--bert_config_file=data/wwm_uncased_L-24_H-1024_A-16/bert_config.json \ +--predict_file=data/wwm_uncased_L-24_H-1024_A-16/dev-v1.1.json \ +--precision=int8 \ +--output_dir=output/bert-squad-output \ +--predict_batch_size=32 \ +--experimental_gelu=True \ +--optimized_softmax=True \ +--input_graph=data/asymmetric_per_channel_bert_int8.pb \ +--do_predict=True --mode=benchmark \ +--inter_op_parallelism_threads=1 \ +--intra_op_parallelism_threads=36 +``` +- To run int8 inference on ``gramine-direct`` (non-SGX version), replace ``gramine-sgx`` with +``gramine-direct`` in the above command. +- To run int8 inference on native baremetal (outside Gramine), replace ``gramine-sgx ./python`` with +``python3`` in the above command. + +## Run inference on ResNet50 model +- To run inference on ``gramine-sgx`` (SGX version): +``` +OMP_NUM_THREADS=36 KMP_AFFINITY=granularity=fine,verbose,compact,1,0 taskset -c 0-35 gramine-sgx \ +./python models/models/image_recognition/tensorflow/resnet50v1_5/inference/eval_image_classifier_inference.py \ +--input-graph=resnet50v1_5_int8_pretrained_model.pb \ +--num-inter-threads=1 \ +--num-intra-threads=36 \ +--batch-size=32 \ +--warmup-steps=50 \ +--steps=500 +``` +- To run inference on ``gramine-direct`` (non-SGX version), replace ``gramine-sgx`` with +``gramine-direct`` in the above command. +- To run inference on native baremetal (outside Gramine), replace ``gramine-sgx ./python`` with +``python3`` in the above command. + +## Notes on optimal performance +Above commands are for a 36 core system. Please set the following options accordingly for optimal +performance: + +- Assuming that X is the number of cores per socket, set `OMP_NUM_THREADS=X`, + `intra_op_parallelism_threads=X` for BERT and `num_intra_threads=X` for ResNet50. +- Specify the whole range of cores available on one of the sockets in `taskset`. +- If hyperthreading is enabled: use ``KMP_AFFINITY=granularity=fine,verbose,compact,1,0`` +- If hyperthreading is disabled: use ``KMP_AFFINITY=granularity=fine,verbose,compact`` +- Note that `OMP_NUM_THREADS` sets the maximum number of threads to + use for OpenMP parallel regions, and `KMP_AFFINITY` binds OpenMP threads + to physical processing units. +- The options `batch-size`, `warmup-steps` and `steps` can be varied for ResNet50 sample. +- To get the number of cores per socket, do ``lscpu | grep 'Core(s) per socket'``. + +## Performance considerations +- Linux systems have CPU frequency scaling governor that helps the system to scale the CPU frequency +to achieve best performance or to save power based on the requirement. +To set the CPU frequency scaling governor to performance mode: + + - ``for ((i=0; i<$(nproc); i++)); do echo 'performance' > /sys/devices/system/cpu/cpu$i/cpufreq/scaling_governor; done`` + +- Preheat manifest option pre-faults the enclave memory and moves the performance penalty to +gramine-sgx invocation (before the workload starts execution). +To use preheat option, add ``sgx.preheat_enclave = true`` to the manifest template. +- TCMalloc and mimalloc are memory allocator libraries from Google and Microsoft that can help + improve performance significantly based on the workloads. At any point, only one of these + allocators can be used. + - TCMalloc (Please update the binary location and name if different from default): + - Install tcmalloc: ``sudo apt-get install google-perftools`` + - Add the following lines in the manifest template and rebuild the sample. + - ``loader.env.LD_PRELOAD = "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"`` + - ``sgx.trusted_files.libtcmalloc = "file:/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"`` + - ``sgx.trusted_files.libunwind = "file:/usr/lib/x86_64-linux-gnu/libunwind.so.8"`` + - mimalloc (Please update the binary location and name if different from default): + - Install mimalloc using the steps from https://github.com/microsoft/mimalloc + - Add the following lines in the manifest template and rebuild the sample. + - ``loader.env.LD_PRELOAD = "/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"`` + - ``sgx.trusted_files.libmimalloc = "file:/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"`` diff --git a/tensorflow/ResNet50/.gitignore b/tensorflow/ResNet50/.gitignore new file mode 100644 index 0000000..e3e7bc8 --- /dev/null +++ b/tensorflow/ResNet50/.gitignore @@ -0,0 +1,2 @@ +/models/ +/resnet50v1_5_int8_pretrained_model.pb diff --git a/tensorflow/ResNet50/Makefile b/tensorflow/ResNet50/Makefile new file mode 100755 index 0000000..bc4edc9 --- /dev/null +++ b/tensorflow/ResNet50/Makefile @@ -0,0 +1,49 @@ +# ResNet50 sample for Tensorflow + +ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine) +SGX_SIGNER_KEY ?= ../../../Pal/src/host/Linux-SGX/signer/enclave-key.pem + +ifeq ($(DEBUG),1) +GRAMINE_LOG_LEVEL = debug +else +GRAMINE_LOG_LEVEL = error +endif + +.PHONY: all collateral +all: python.manifest +ifeq ($(SGX),1) +all: python.manifest.sgx python.sig python.token +endif + +collateral: + test -d models || git clone https://github.com/IntelAI/models.git + test -f resnet50v1_5_int8_pretrained_model.pb || wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/resnet50v1_5_int8_pretrained_model.pb + +python.manifest: python.manifest.template collateral + gramine-manifest \ + -Dlog_level=$(GRAMINE_LOG_LEVEL) \ + -Darch_libdir=$(ARCH_LIBDIR) \ + -Dentrypoint=$(realpath $(shell sh -c "command -v python3")) \ + -Dpythondistpath=$(PYTHONDISTPATH) \ + $< >$@ + +python.manifest.sgx: python.manifest + @test -s $(SGX_SIGNER_KEY) || \ + { echo "SGX signer private key was not found, please specify SGX_SIGNER_KEY!"; exit 1; } + gramine-sgx-sign \ + --key $(SGX_SIGNER_KEY) \ + --manifest python.manifest \ + --output $@ + +python.sig: python.manifest.sgx + +python.token: python.sig + gramine-sgx-get-token --output $@ --sig $< + +.PHONY: clean +clean: + $(RM) *.manifest *.manifest.sgx *.token *.sig + +.PHONY: distclean +distclean: clean + $(RM) -r models/ resnet50v1_5_int8_pretrained_model.pb diff --git a/tensorflow/ResNet50/python.manifest.template b/tensorflow/ResNet50/python.manifest.template new file mode 100755 index 0000000..563095a --- /dev/null +++ b/tensorflow/ResNet50/python.manifest.template @@ -0,0 +1,72 @@ +libos.entrypoint = "{{ entrypoint }}" +loader.preload = "file:{{ gramine.libos }}" + +loader.log_level = "{{ log_level }}" + +loader.insecure__use_cmdline_argv = true +loader.insecure__use_host_env = true +loader.insecure__disable_aslr = true + +loader.env.LD_LIBRARY_PATH = "{{ python.stdlib }}/lib:/lib:{{ arch_libdir }}:/usr/lib:/usr/{{ arch_libdir }}" + +loader.pal_internal_mem_size = "512M" + +fs.mount.lib.type = "chroot" +fs.mount.lib.path = "/lib" +fs.mount.lib.uri = "file:{{ gramine.runtimedir() }}" + +fs.mount.lib2.type = "chroot" +fs.mount.lib2.path = "{{ arch_libdir }}" +fs.mount.lib2.uri = "file:{{ arch_libdir }}" + +fs.mount.usr.type = "chroot" +fs.mount.usr.path = "/usr" +fs.mount.usr.uri = "file:/usr" + +fs.mount.bin.type = "chroot" +fs.mount.bin.path = "/bin" +fs.mount.bin.uri = "file:/bin" + +fs.mount.pyhome.type = "chroot" +fs.mount.pyhome.path = "{{ python.stdlib }}" +fs.mount.pyhome.uri = "file:{{ python.stdlib }}" + +fs.mount.pydisthome.type = "chroot" +fs.mount.pydisthome.path = "{{ python.distlib }}" +fs.mount.pydisthome.uri = "file:{{ python.distlib }}" + +fs.mount.pydistpath.type = "chroot" +fs.mount.pydistpath.path = "{{ pythondistpath }}" +fs.mount.pydistpath.uri = "file:{{ pythondistpath }}" + +fs.mount.tmp.type = "chroot" +fs.mount.tmp.path = "/tmp" +fs.mount.tmp.uri = "file:/tmp" + +fs.mount.etc.type = "chroot" +fs.mount.etc.path = "/etc" +fs.mount.etc.uri = "file:/etc" + +sgx.enclave_size = "32G" +sgx.thread_num = 300 +sgx.preheat_enclave = true +sgx.nonpie_binary = true + +sgx.trusted_files = [ + "file:{{ gramine.runtimedir() }}/", + "file:{{ arch_libdir }}/", + "file:/usr/{{ arch_libdir }}/", + "file:resnet50v1_5_int8_pretrained_model.pb", + "file:{{ entrypoint }}/", + "file:{{ python.stdlib }}/", + "file:{{ python.distlib }}/", + "file:{{ pythondistpath }}/", +] + +sgx.allowed_files = [ + "file:/tmp/", + "file:/etc/", + "file:/proc/", + "file:models/", + "file:root/.keras/keras.json", +] From c05a2de3c93b63af92812262e20d9b8b070a5f94 Mon Sep 17 00:00:00 2001 From: Satyanaraya Illa Date: Wed, 1 Dec 2021 13:07:05 +0530 Subject: [PATCH 2/3] fixup! Add TensorFlow examples - ResNet50 and BERT models Signed-off-by: Satyanaraya Illa --- tensorflow/BERT/python.manifest.template | 1 - tensorflow/README.md | 15 ++++++--------- tensorflow/ResNet50/python.manifest.template | 1 - 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/tensorflow/BERT/python.manifest.template b/tensorflow/BERT/python.manifest.template index 65a2241..758a7fb 100755 --- a/tensorflow/BERT/python.manifest.template +++ b/tensorflow/BERT/python.manifest.template @@ -64,5 +64,4 @@ sgx.allowed_files = [ "file:output/", "file:models/", "file:data/", - "file:root/.keras/keras.json", ] diff --git a/tensorflow/README.md b/tensorflow/README.md index 5e3d5a8..0e63b8d 100755 --- a/tensorflow/README.md +++ b/tensorflow/README.md @@ -27,8 +27,6 @@ More details about ResNet50 can be found at https://github.com/IntelAI/models/tr - To build the SGX version, do ``make PYTHONDISTPATH=path_to_python_dist_packages/ SGX=1`` - Typically, ``path_to_python_dist_packages`` is ``/usr/local/lib/python3.6/dist-packages``, but can change based on python's installation directory. -- Keras settings are configured in the file ``root/.keras/keras.json``. It is configured to use -TensorFlow as backend. **WARNING:** Building BERT sample downloads about 5GB of data. @@ -103,12 +101,11 @@ To use preheat option, add ``sgx.preheat_enclave = true`` to the manifest templa allocators can be used. - TCMalloc (Please update the binary location and name if different from default): - Install tcmalloc: ``sudo apt-get install google-perftools`` - - Add the following lines in the manifest template and rebuild the sample. - - ``loader.env.LD_PRELOAD = "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"`` - - ``sgx.trusted_files.libtcmalloc = "file:/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"`` - - ``sgx.trusted_files.libunwind = "file:/usr/lib/x86_64-linux-gnu/libunwind.so.8"`` + - To use tcmalloc, + - Add ``loader.env.LD_PRELOAD = "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"`` to the manifest template. + - Add ``"file:/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"`` and ``"file:/usr/lib/x86_64-linux-gnu/libunwind.so.8"`` to ``sgx.trusted_files``. - mimalloc (Please update the binary location and name if different from default): - Install mimalloc using the steps from https://github.com/microsoft/mimalloc - - Add the following lines in the manifest template and rebuild the sample. - - ``loader.env.LD_PRELOAD = "/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"`` - - ``sgx.trusted_files.libmimalloc = "file:/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"`` + - To use mimalloc, + - Add ``loader.env.LD_PRELOAD = "/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"`` to the manifest template. + - Add ``"file:/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"`` to ``sgx.trusted_files``. diff --git a/tensorflow/ResNet50/python.manifest.template b/tensorflow/ResNet50/python.manifest.template index 563095a..7fae773 100755 --- a/tensorflow/ResNet50/python.manifest.template +++ b/tensorflow/ResNet50/python.manifest.template @@ -68,5 +68,4 @@ sgx.allowed_files = [ "file:/etc/", "file:/proc/", "file:models/", - "file:root/.keras/keras.json", ] From 6b68ed89879696437f2aac7f9f697ac067cdcae8 Mon Sep 17 00:00:00 2001 From: Satyanaraya Illa Date: Mon, 20 Dec 2021 15:50:28 +0530 Subject: [PATCH 3/3] fixup! Add TensorFlow examples - ResNet50 and BERT models Signed-off-by: Satyanaraya Illa --- tensorflow/BERT/Makefile | 2 +- tensorflow/BERT/python.manifest.template | 5 +- tensorflow/README.md | 68 ++++++++++++++------ tensorflow/ResNet50/Makefile | 2 +- tensorflow/ResNet50/python.manifest.template | 6 +- 5 files changed, 55 insertions(+), 28 deletions(-) diff --git a/tensorflow/BERT/Makefile b/tensorflow/BERT/Makefile index ea887ef..bf7af5d 100755 --- a/tensorflow/BERT/Makefile +++ b/tensorflow/BERT/Makefile @@ -1,4 +1,4 @@ -# BERT sample for Tensorflow +# BERT sample for TensorFlow ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine) SGX_SIGNER_KEY ?= ../../../Pal/src/host/Linux-SGX/signer/enclave-key.pem diff --git a/tensorflow/BERT/python.manifest.template b/tensorflow/BERT/python.manifest.template index 758a7fb..017305b 100755 --- a/tensorflow/BERT/python.manifest.template +++ b/tensorflow/BERT/python.manifest.template @@ -1,5 +1,5 @@ libos.entrypoint = "{{ entrypoint }}" -loader.preload = "file:{{ gramine.libos }}" +loader.entrypoint = "file:{{ gramine.libos }}" loader.log_level = "{{ log_level }}" @@ -52,7 +52,8 @@ sgx.trusted_files = [ "file:{{ gramine.runtimedir() }}/", "file:{{ arch_libdir }}/", "file:/usr/{{ arch_libdir }}/", - "file:{{ entrypoint }}/", + "file:{{ gramine.libos }}", + "file:{{ entrypoint }}", "file:{{ python.stdlib }}/", "file:{{ python.distlib }}/", "file:{{ pythondistpath }}/", diff --git a/tensorflow/README.md b/tensorflow/README.md index 0e63b8d..78080f5 100755 --- a/tensorflow/README.md +++ b/tensorflow/README.md @@ -51,7 +51,7 @@ OMP_NUM_THREADS=36 KMP_AFFINITY=granularity=fine,verbose,compact,1,0 taskset -c ``` - To run int8 inference on ``gramine-direct`` (non-SGX version), replace ``gramine-sgx`` with ``gramine-direct`` in the above command. -- To run int8 inference on native baremetal (outside Gramine), replace ``gramine-sgx ./python`` with +- To run int8 inference natively (outside Gramine), replace ``gramine-sgx ./python`` with ``python3`` in the above command. ## Run inference on ResNet50 model @@ -68,7 +68,7 @@ OMP_NUM_THREADS=36 KMP_AFFINITY=granularity=fine,verbose,compact,1,0 taskset -c ``` - To run inference on ``gramine-direct`` (non-SGX version), replace ``gramine-sgx`` with ``gramine-direct`` in the above command. -- To run inference on native baremetal (outside Gramine), replace ``gramine-sgx ./python`` with +- To run inference natively (outside Gramine), replace ``gramine-sgx ./python`` with ``python3`` in the above command. ## Notes on optimal performance @@ -87,25 +87,51 @@ performance: - To get the number of cores per socket, do ``lscpu | grep 'Core(s) per socket'``. ## Performance considerations -- Linux systems have CPU frequency scaling governor that helps the system to scale the CPU frequency -to achieve best performance or to save power based on the requirement. -To set the CPU frequency scaling governor to performance mode: +### CPU frequency scaling - - ``for ((i=0; i<$(nproc); i++)); do echo 'performance' > /sys/devices/system/cpu/cpu$i/cpufreq/scaling_governor; done`` +Linux systems have CPU frequency scaling governor that helps the system to scale the CPU frequency +to achieve best performance or to save power based on the requirement. To achieve the best +performance, please set the CPU frequency scaling governor to `performance` mode. + +```bash +for ((i=0; i<$(nproc); i++)); do + echo 'performance' > /sys/devices/system/cpu/cpu$i/cpufreq/scaling_governor; +done +``` + +### Manifest options for performance - Preheat manifest option pre-faults the enclave memory and moves the performance penalty to -gramine-sgx invocation (before the workload starts execution). -To use preheat option, add ``sgx.preheat_enclave = true`` to the manifest template. -- TCMalloc and mimalloc are memory allocator libraries from Google and Microsoft that can help - improve performance significantly based on the workloads. At any point, only one of these - allocators can be used. - - TCMalloc (Please update the binary location and name if different from default): - - Install tcmalloc: ``sudo apt-get install google-perftools`` - - To use tcmalloc, - - Add ``loader.env.LD_PRELOAD = "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"`` to the manifest template. - - Add ``"file:/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"`` and ``"file:/usr/lib/x86_64-linux-gnu/libunwind.so.8"`` to ``sgx.trusted_files``. - - mimalloc (Please update the binary location and name if different from default): - - Install mimalloc using the steps from https://github.com/microsoft/mimalloc - - To use mimalloc, - - Add ``loader.env.LD_PRELOAD = "/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"`` to the manifest template. - - Add ``"file:/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"`` to ``sgx.trusted_files``. +Gramine-SGX startup (before the workload starts executing). To use the preheat option, make sure +that `sgx.preheat_enclave = true` is added to the manifest template. + +### Memory allocator libraries + +TCMalloc and mimalloc are memory allocator libraries from Google and Microsoft that can help +improve performance significantly based on the workloads. Only one of these +allocators can be used. + +#### TCMalloc + +(Please update the binary location and name if different from default.) +- Install tcmalloc: `sudo apt-get install google-perftools` +- Modify the manifest template file: + - Add `loader.env.LD_PRELOAD = "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"` + - Append below entries to `sgx.trusted_files`: + - `"file:/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4"` + - `"file:/usr/lib/x86_64-linux-gnu/libunwind.so.8"` +- Save the manifest template and rebuild this example. + +#### mimalloc + +(Please update the binary location and name if different from default.) +- Install mimalloc using the steps from https://github.com/microsoft/mimalloc +- Modify the manifest template file: + - Add the `/usr/local` FS mount point: + - `fs.mount.usr_local.type = "chroot"` + - `fs.mount.usr_local.path = "/usr/local"` + - `fs.mount.usr_local.uri = "file:/usr/local"` + - Add `loader.env.LD_PRELOAD = "/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"` + - Append below entry to `sgx.trusted_files`: + - `"file:/usr/local/lib/mimalloc-1.7/libmimalloc.so.1.7"` +- Save the manifest template and rebuild this example. diff --git a/tensorflow/ResNet50/Makefile b/tensorflow/ResNet50/Makefile index bc4edc9..6ce2252 100755 --- a/tensorflow/ResNet50/Makefile +++ b/tensorflow/ResNet50/Makefile @@ -1,4 +1,4 @@ -# ResNet50 sample for Tensorflow +# ResNet50 sample for TensorFlow ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine) SGX_SIGNER_KEY ?= ../../../Pal/src/host/Linux-SGX/signer/enclave-key.pem diff --git a/tensorflow/ResNet50/python.manifest.template b/tensorflow/ResNet50/python.manifest.template index 7fae773..6aef10d 100755 --- a/tensorflow/ResNet50/python.manifest.template +++ b/tensorflow/ResNet50/python.manifest.template @@ -1,5 +1,5 @@ +loader.entrypoint = "file:{{ gramine.libos }}" libos.entrypoint = "{{ entrypoint }}" -loader.preload = "file:{{ gramine.libos }}" loader.log_level = "{{ log_level }}" @@ -57,7 +57,8 @@ sgx.trusted_files = [ "file:{{ arch_libdir }}/", "file:/usr/{{ arch_libdir }}/", "file:resnet50v1_5_int8_pretrained_model.pb", - "file:{{ entrypoint }}/", + "file:{{ gramine.libos }}", + "file:{{ entrypoint }}", "file:{{ python.stdlib }}/", "file:{{ python.distlib }}/", "file:{{ pythondistpath }}/", @@ -66,6 +67,5 @@ sgx.trusted_files = [ sgx.allowed_files = [ "file:/tmp/", "file:/etc/", - "file:/proc/", "file:models/", ]