From 147ba0f9c3fe4fe2055906ff9d796fcb1b513312 Mon Sep 17 00:00:00 2001 From: Dmitrii Kuvaiskii Date: Thu, 25 Jul 2024 23:06:39 -0700 Subject: [PATCH] Add Candle ML framework example Candle is a minimalist ML framework for Rust with a focus on performance and ease of use. This commit adds the Quantized LLaMA example. Signed-off-by: Dmitrii Kuvaiskii --- candle/.gitignore | 6 +++ candle/Makefile | 59 +++++++++++++++++++++++ candle/README.md | 27 +++++++++++ candle/candle_quantized.manifest.template | 38 +++++++++++++++ 4 files changed, 130 insertions(+) create mode 100644 candle/.gitignore create mode 100644 candle/Makefile create mode 100644 candle/README.md create mode 100644 candle/candle_quantized.manifest.template diff --git a/candle/.gitignore b/candle/.gitignore new file mode 100644 index 0000000..6f5dff6 --- /dev/null +++ b/candle/.gitignore @@ -0,0 +1,6 @@ +/candle_quantized +/src + +# model +/*.bin +/*.json diff --git a/candle/Makefile b/candle/Makefile new file mode 100644 index 0000000..83c0dad --- /dev/null +++ b/candle/Makefile @@ -0,0 +1,59 @@ +# Copyright (C) 2024 Gramine contributors +# SPDX-License-Identifier: BSD-3-Clause + +ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine) + +ifeq ($(DEBUG),1) +GRAMINE_LOG_LEVEL = debug +else +GRAMINE_LOG_LEVEL = error +endif + +SRCDIR = src + +.PHONY: all +all: candle_quantized candle_quantized.manifest +ifeq ($(SGX),1) +all: candle_quantized.manifest.sgx candle_quantized.sig +endif + +llama-2-7b.ggmlv3.q4_0.bin: + ../common_tools/download --output $@ \ + --sha256 bfa26d855e44629c4cf919985e90bd7fa03b77eea1676791519e39a4d45fd4d5 \ + --url https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/$@ + +tokenizer.json: + ../common_tools/download --output $@ \ + --sha256 8eea70c4866c4f1320ba096fc986ac82038a8374dbe135212ba7628835b4a6f1 \ + --url https://huggingface.co/hf-internal-testing/llama-tokenizer/raw/main/$@ + +$(SRCDIR)/candle_quantized/target/release/examples/quantized: llama-2-7b.ggmlv3.q4_0.bin tokenizer.json + mkdir -p $(SRCDIR) && cd $(SRCDIR) && \ + git clone https://github.com/huggingface/candle.git candle_quantized && \ + cd candle_quantized && \ + cargo build --example quantized --release + +candle_quantized: $(SRCDIR)/candle_quantized/target/release/examples/quantized + cp $< $@ + +candle_quantized.manifest: candle_quantized.manifest.template + gramine-manifest \ + -Dlog_level=$(GRAMINE_LOG_LEVEL) \ + -Darch_libdir=$(ARCH_LIBDIR) \ + $< > $@ + +candle_quantized.manifest.sgx candle_quantized.sig: candle_quantized_sgx_sign + @: + +.INTERMEDIATE: candle_quantized_sgx_sign +candle_quantized_sgx_sign: candle_quantized.manifest candle_quantized + gramine-sgx-sign \ + --manifest $< \ + --output $<.sgx +.PHONY: clean +clean: + $(RM) *.token *.sig *.manifest.sgx *.manifest candle_quantized + +.PHONY: distclean +distclean: clean + $(RM) -r $(SRCDIR) *.tar.gz *.bin *.json diff --git a/candle/README.md b/candle/README.md new file mode 100644 index 0000000..ecabedb --- /dev/null +++ b/candle/README.md @@ -0,0 +1,27 @@ +# Candle + +[Candle](https://github.com/huggingface/candle) is a minimalist ML framework for +Rust with a focus on performance (including GPU support) and ease of use. + +This directory contains the Makefile and the template manifest for the most +recent version of Candle as of this writing (v0.6.0). + +# Warning + +The `candle_quantized` app will download ~4GB of data (model + tokenizer). This +happens automatically in the Makefile. + +# Quick Start + +```sh +# build Candle (uses Rust Cargo) and the final manifest +make SGX=1 + +# run Quantized LLaMA (quantized version of the LLaMA model) +# note that for Gramine, the cmdline args are already defined in the manifest file +# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#check-out-our-examples +RAYON_NUM_THREADS=36 ./candle_quantized \ + --model llama-2-7b.ggmlv3.q4_0.bin --tokenizer tokenizer.json --sample-len 200 +RAYON_NUM_THREADS=36 gramine-direct ./candle_quantized +RAYON_NUM_THREADS=36 gramine-sgx ./candle_quantized +``` diff --git a/candle/candle_quantized.manifest.template b/candle/candle_quantized.manifest.template new file mode 100644 index 0000000..a2d4894 --- /dev/null +++ b/candle/candle_quantized.manifest.template @@ -0,0 +1,38 @@ +# Copyright (C) 2024 Gramine contributors +# SPDX-License-Identifier: BSD-3-Clause + +loader.entrypoint = "file:{{ gramine.libos }}" +libos.entrypoint = "/candle_quantized" + +loader.log_level = "{{ log_level }}" + +loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}" +loader.env.RAYON_NUM_THREADS = { passthrough = true } + +loader.argv = [ "candle_quantized", "--model", "llama-2-7b.ggmlv3.q4_0.bin", + "--tokenizer", "tokenizer.json", "--sample-len", "200" ] + +fs.mounts = [ + { path = "/candle_quantized", uri = "file:candle_quantized" }, + { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" }, + { path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" }, + + { path = "/llama-2-7b.ggmlv3.q4_0.bin", uri = "file:llama-2-7b.ggmlv3.q4_0.bin" }, + { path = "/tokenizer.json", uri = "file:tokenizer.json" }, +] + +sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }} +sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '256' }} +sgx.enclave_size = "32G" + +sgx.trusted_files = [ + "file:candle_quantized", + "file:{{ gramine.libos }}", + "file:{{ gramine.runtimedir() }}/", + "file:{{ arch_libdir }}/libcrypto.so.3", + "file:{{ arch_libdir }}/libgcc_s.so.1", + "file:{{ arch_libdir }}/libssl.so.3", + + "file:llama-2-7b.ggmlv3.q4_0.bin", + "file:tokenizer.json", +]