Skip to content

Commit 88d4425

Browse files
committed
- move chat template related code from worker to simulator
- fix makefile to be allow a valid python build process Signed-off-by: Maya Barnea <mayab@il.ibm.com>
1 parent 218fa5f commit 88d4425

File tree

5 files changed

+186
-81
lines changed

5 files changed

+186
-81
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ pkg/dataset/.llm-d
1111
pkg/llm-d-inference-sim/tests-tmp/
1212
pkg/llm-d-inference-sim/.llm-d/
1313
.llm-d/
14+
.venv

Makefile

Lines changed: 140 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ IMAGE_REGISTRY ?= ghcr.io/llm-d
3131
IMAGE_TAG_BASE ?= $(IMAGE_REGISTRY)/$(PROJECT_NAME)
3232
SIM_TAG ?= dev
3333
IMG = $(IMAGE_TAG_BASE):$(SIM_TAG)
34+
POD_IP ?= pod
35+
export POD_IP
3436

3537
ifeq ($(TARGETOS),darwin)
3638
ifeq ($(TARGETARCH),amd64)
@@ -60,20 +62,61 @@ export PKG_CONFIG_PATH=/usr/lib/pkgconfig
6062
PYTHON_VERSION := 3.12
6163

6264
# Unified Python configuration detection. This block runs once.
63-
PYTHON_CONFIG ?= $(shell command -v python$(PYTHON_VERSION)-config || command -v python3-config)
65+
# It prioritizes python-config, then pkg-config, for reliability.
66+
UNAME_S := $(shell uname -s)
67+
ifeq ($(UNAME_S),Darwin)
68+
# macOS: Find Homebrew's python-config script for the most reliable flags.
69+
BREW_PREFIX := $(shell command -v brew >/dev/null 2>&1 && brew --prefix python@$(PYTHON_VERSION) 2>/dev/null)
70+
PYTHON_CONFIG := $(BREW_PREFIX)/bin/python$(PYTHON_VERSION)-config
71+
ifneq ($(shell $(PYTHON_CONFIG) --cflags 2>/dev/null),)
72+
PYTHON_CFLAGS := $(shell $(PYTHON_CONFIG) --cflags)
73+
# Use --ldflags --embed to get all necessary flags for linking
74+
PYTHON_LDFLAGS := $(shell $(PYTHON_CONFIG) --ldflags --embed)
75+
PYTHON_LIBS :=
76+
else
77+
$(error "Could not execute 'python$(PYTHON_VERSION)-config' from Homebrew. Please ensure Python is installed correctly with: 'brew install python@$(PYTHON_VERSION)'")
78+
endif
79+
else ifeq ($(UNAME_S),Linux)
80+
# Linux: Use standard system tools to find flags.
81+
PYTHON_CONFIG := $(shell command -v python$(PYTHON_VERSION)-config || command -v python3-config)
82+
ifneq ($(shell $(PYTHON_CONFIG) --cflags 2>/dev/null),)
83+
# Use python-config if available and correct
84+
PYTHON_CFLAGS := $(shell $(PYTHON_CONFIG) --cflags)
85+
PYTHON_LDFLAGS := $(shell $(PYTHON_CONFIG) --ldflags --embed)
86+
PYTHON_LIBS :=
87+
else ifneq ($(shell pkg-config --cflags python-$(PYTHON_VERSION) 2>/dev/null),)
88+
# Fallback to pkg-config
89+
PYTHON_CFLAGS := $(shell pkg-config --cflags python-$(PYTHON_VERSION))
90+
PYTHON_LDFLAGS := $(shell pkg-config --libs python-$(PYTHON_VERSION))
91+
PYTHON_LIBS :=
92+
else
93+
$(error "Python $(PYTHON_VERSION) development headers not found. Please install with: 'sudo apt install python$(PYTHON_VERSION)-dev' or 'sudo dnf install python$(PYTHON_VERSION)-devel'")
94+
endif
95+
else
96+
$(error "Unsupported OS: $(UNAME_S)")
97+
endif
98+
99+
# Final CGO flags with all dependencies
100+
CGO_CFLAGS_FINAL := $(PYTHON_CFLAGS) -Ilib
101+
CGO_LDFLAGS_FINAL := $(PYTHON_LDFLAGS) $(PYTHON_LIBS) -Llib -ltokenizers -ldl -lm
102+
103+
VENV_DIR ?= $(shell pwd)/.venv
104+
VENV_BIN := $(VENV_DIR)/bin
105+
VENV_SRC := $(VENV_DIR)/python
64106

65-
CGO_CFLAGS := $(shell $(PYTHON_CONFIG) --cflags --embed)
66-
CGO_LDFLAGS := $(shell $(PYTHON_CONFIG) --ldflags --embed)
107+
PYTHON_EXE := $(shell command -v python$(PYTHON_VERSION) || command -v python3)
67108

68109
GOMODCACHE := $(shell go env GOMODCACHE)
69110
KV_CACHE_MGR_VERSION := $(shell go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager)
70111
KV_CACHE_MGR_PATH := $(GOMODCACHE)/github.com/llm-d/llm-d-kv-cache-manager@$(KV_CACHE_MGR_VERSION)/pkg/preprocessing/chat_completions
71-
export PYTHONPATH := $(KV_CACHE_MGR_PATH):$(PYTHONPATH)
72112

73-
CPATH := $(PYTHON_INCLUDE):$(CPATH)
113+
# Common environment variables for Go tests and builds
114+
export CGO_ENABLED=1
115+
export CGO_CFLAGS=$(CGO_CFLAGS_FINAL)
116+
export CGO_LDFLAGS=$(CGO_LDFLAGS_FINAL)
117+
export PYTHONPATH=$(VENV_SRC):$(VENV_DIR)/lib/python$(PYTHON_VERSION)/site-packages
74118

75119
GO_LDFLAGS := -extldflags '-L$(shell pwd)/lib $(LDFLAGS) $(CGO_LDFLAGS)'
76-
CGO_ENABLED=1
77120
TOKENIZER_LIB = lib/libtokenizers.a
78121
# Extract TOKENIZER_VERSION from Dockerfile
79122
TOKENIZER_VERSION := $(shell grep '^ARG TOKENIZER_VERSION=' Dockerfile | cut -d'=' -f2)
@@ -84,7 +127,11 @@ $(TOKENIZER_LIB):
84127
## Download the HuggingFace tokenizer bindings.
85128
@echo "Downloading HuggingFace tokenizer bindings for version $(TOKENIZER_VERSION)..."
86129
mkdir -p lib
87-
curl -L https://github.com/daulet/tokenizers/releases/download/$(TOKENIZER_VERSION)/libtokenizers.$(TARGETOS)-$(TOKENIZER_ARCH).tar.gz | tar -xz -C lib
130+
if [ "$(TARGETOS)" = "darwin" ] && [ "$(TARGETARCH)" = "amd64" ]; then \
131+
curl -L https://github.com/daulet/tokenizers/releases/download/$(TOKENIZER_VERSION)/libtokenizers.$(TARGETOS)-x86_64.tar.gz | tar -xz -C lib; \
132+
else \
133+
curl -L https://github.com/daulet/tokenizers/releases/download/$(TOKENIZER_VERSION)/libtokenizers.$(TARGETOS)-$(TARGETARCH).tar.gz | tar -xz -C lib; \
134+
fi
88135
ranlib lib/*.a
89136

90137
##@ Development
@@ -101,13 +148,18 @@ format: ## Format Go source files
101148
@gofmt -l -w $(SRC)
102149

103150
.PHONY: test
104-
test: $(GINKGO) install-dependencies ## Run tests
105-
@printf "\033[33;1m==== Running tests ====\033[0m\n"
106-
ifdef GINKGO_FOCUS
107-
CGO_ENABLED=1 CGO_CFLAGS="$(CGO_CFLAGS)" $(GINKGO) -ldflags="$(GO_LDFLAGS)" -v -r -- -ginkgo.v -ginkgo.focus="$(GINKGO_FOCUS)"
108-
else
109-
CGO_ENABLED=1 CGO_CFLAGS="$(CGO_CFLAGS)" $(GINKGO) -ldflags="$(GO_LDFLAGS)" -v -r $(TEST_PKG)
110-
endif
151+
test: download-tokenizer install-python-deps # download-zmq ## Run unit tests
152+
@printf "\033[33;1m==== Running unit tests ====\033[0m\n"
153+
if [ -n "$(GINKGO_FOCUS)" ] && [ -z "$(GINKGO_FOCUS_PKG)" ]; then \
154+
echo "Error: GINKGO_FOCUS is defined without GINKGO_FOCUS_PKG. Both required or neither."; \
155+
exit 1; \
156+
elif [ -n "$(GINKGO_FOCUS)$(GINKGO_FOCUS_PKG)" ]; then \
157+
echo "Running specific tests"; \
158+
go test -v $(GINKGO_FOCUS_PKG) $(if $(GINKGO_FOCUS),-ginkgo.focus="$(GINKGO_FOCUS)",); \
159+
else \
160+
echo "Running all tests"; \
161+
go test -v ./pkg/...; \
162+
fi
111163

112164
.PHONY: post-deploy-test
113165
post-deploy-test: ## Run post deployment tests
@@ -122,10 +174,15 @@ lint: $(GOLANGCI_LINT) ## Run lint
122174
##@ Build
123175

124176
.PHONY: build
125-
build: check-go install-dependencies
177+
build: check-go download-tokenizer install-python-deps download-zmq
126178
@printf "\033[33;1m==== Building ====\033[0m\n"
127179
CGO_CFLAGS="$(CGO_CFLAGS)" go build -ldflags="$(GO_LDFLAGS)" -o $(LOCALBIN)/$(PROJECT_NAME) cmd/$(PROJECT_NAME)/main.go
128180

181+
.PHONY: run
182+
run: install-python-deps # build ## Run the application locally
183+
@printf "\033[33;1m==== Running application ====\033[0m\n"
184+
. $(VENV_DIR)/bin/activate && ./bin/$(PROJECT_NAME) $(ARGS)
185+
129186
##@ Container Build/Push
130187

131188
.PHONY: image-build
@@ -234,43 +291,81 @@ print-project-name: ## Print the current project name
234291
install-hooks: ## Install git hooks
235292
git config core.hooksPath hooks
236293

294+
.PHONY: detect-python
295+
detect-python: ## Detects Python and prints the configuration.
296+
@printf "\033[33;1m==== Python Configuration ====\033[0m\n"
297+
@if [ -z "$(PYTHON_EXE)" ]; then \
298+
echo "ERROR: Python 3 not found in PATH."; \
299+
exit 1; \
300+
fi
301+
@# Verify the version of the found python executable using its exit code
302+
@if ! $(PYTHON_EXE) -c "import sys; sys.exit(0 if sys.version_info[:2] == ($(shell echo $(PYTHON_VERSION) | cut -d. -f1), $(shell echo $(PYTHON_VERSION) | cut -d. -f2)) else 1)"; then \
303+
echo "ERROR: Found Python at '$(PYTHON_EXE)' but it is not version $(PYTHON_VERSION)."; \
304+
echo "Please ensure 'python$(PYTHON_VERSION)' or a compatible 'python3' is in your PATH."; \
305+
exit 1; \
306+
fi
307+
@echo "Python executable: $(PYTHON_EXE) ($$($(PYTHON_EXE) --version))"
308+
@echo "Python CFLAGS: $(PYTHON_CFLAGS)"
309+
@echo "Python LDFLAGS: $(PYTHON_LDFLAGS)"
310+
@if [ -z "$(PYTHON_CFLAGS)" ]; then \
311+
echo "ERROR: Python development headers not found. See installation instructions above."; \
312+
exit 1; \
313+
fi
314+
@printf "\033[33;1m==============================\033[0m\n"
315+
316+
.PHONY: install-python-deps
317+
install-python-deps: detect-python ## Sets up the Python virtual environment and installs dependencies.
318+
@printf "\033[33;1m==== Setting up Python virtual environment in $(VENV_DIR) ====\033[0m\n"
319+
@if [ ! -f "$(VENV_BIN)/pip" ]; then \
320+
echo "Creating virtual environment..."; \
321+
$(PYTHON_EXE) -m venv $(VENV_DIR) || { \
322+
echo "ERROR: Failed to create virtual environment."; \
323+
echo "Your Python installation may be missing the 'venv' module."; \
324+
echo "Try: 'sudo apt install python$(PYTHON_VERSION)-venv' or 'sudo dnf install python$(PYTHON_VERSION)-devel'"; \
325+
exit 1; \
326+
}; \
327+
mkdir -p $(VENV_SRC); \
328+
fi
329+
@echo "Upgrading pip and installing dependencies..."
330+
@$(VENV_BIN)/pip install --upgrade pip
331+
cp $(KV_CACHE_MGR_PATH)/requirements.txt $(VENV_SRC)/
332+
cp $(KV_CACHE_MGR_PATH)/render_jinja_template_wrapper.py $(VENV_SRC)/
333+
chmod u+w $(VENV_SRC)/*
334+
@$(VENV_BIN)/pip install -r $(VENV_SRC)/requirements.txt
335+
@echo "Verifying transformers installation..."
336+
@$(VENV_BIN)/python -c "import transformers; print('✅ Transformers version ' + transformers.__version__ + ' installed.')" || { \
337+
echo "ERROR: transformers library not properly installed in venv."; \
338+
exit 1; \
339+
}
340+
237341
##@ ZMQ Setup
238342

239-
.PHONY: install-dependencies
240-
install-dependencies: download-tokenizer ## Install development dependencies based on OS/ARCH
241-
@echo "Checking and installing development dependencies..."
242-
@if [ "$(TARGETOS)" = "linux" ]; then \
243-
if [ -x "$$(command -v apt)" ]; then \
244-
if ! dpkg -s libzmq3-dev >/dev/null 2>&1 || ! dpkg -s g++ >/dev/null 2>&1; then \
245-
echo "Installing dependencies with apt..."; \
246-
sudo apt-get update && sudo apt-get install -y libzmq3-dev g++; \
247-
else \
248-
echo "✅ ZMQ and g++ are already installed."; \
249-
fi; \
250-
elif [ -x "$$(command -v dnf)" ]; then \
251-
if ! dnf -q list installed zeromq-devel >/dev/null 2>&1 || ! dnf -q list installed gcc-c++ >/dev/null 2>&1; then \
252-
echo "Installing dependencies with dnf..."; \
253-
sudo dnf install -y zeromq-devel gcc-c++; \
343+
.PHONY: download-zmq
344+
download-zmq: ## Install ZMQ dependencies based on OS/ARCH
345+
@echo "Checking if ZMQ is already installed..."
346+
@if pkg-config --exists libzmq; then \
347+
echo "✅ ZMQ is already installed."; \
348+
else \
349+
echo "Installing ZMQ dependencies..."; \
350+
if [ "$(TARGETOS)" = "linux" ]; then \
351+
if [ -x "$$(command -v apt)" ]; then \
352+
apt update && apt install -y libzmq3-dev; \
353+
elif [ -x "$$(command -v dnf)" ]; then \
354+
dnf install -y zeromq-devel; \
254355
else \
255-
echo "✅ ZMQ and gcc-c++ are already installed."; \
356+
echo "Unsupported Linux package manager. Install libzmq manually."; \
357+
exit 1; \
256358
fi; \
257-
else \
258-
echo "Unsupported Linux package manager. Install libzmq and g++/gcc-c++ manually."; \
259-
exit 1; \
260-
fi; \
261-
elif [ "$(TARGETOS)" = "darwin" ]; then \
262-
if [ -x "$$(command -v brew)" ]; then \
263-
if ! brew list zeromq pkg-config >/dev/null 2>&1; then \
264-
echo "Installing dependencies with brew..."; \
265-
brew install zeromq pkg-config; \
359+
elif [ "$(TARGETOS)" = "darwin" ]; then \
360+
if [ -x "$$(command -v brew)" ]; then \
361+
brew install zeromq; \
266362
else \
267-
echo "✅ ZeroMQ and pkgconf are already installed."; \
363+
echo "Homebrew is not installed and is required to install zeromq. Install it from https://brew.sh/"; \
364+
exit 1; \
268365
fi; \
269366
else \
270-
echo "Homebrew is not installed and is required to install zeromq. Install it from https://brew.sh/"; \
367+
echo "Unsupported OS: $(TARGETOS). Install libzmq manually - check https://zeromq.org/download/ for guidance."; \
271368
exit 1; \
272369
fi; \
273-
else \
274-
echo "Unsupported OS: $(TARGETOS). Install development dependencies manually."; \
275-
exit 1; \
370+
echo "✅ ZMQ dependencies installed."; \
276371
fi

pkg/kv-cache/kv_cache.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ type KVCacheHelper struct {
3535
blockSize int
3636
}
3737

38-
func NewKVCacheHelper(ctx context.Context, config *common.Configuration, logger logr.Logger, usageChan chan float64,
38+
func NewKVCacheHelper(config *common.Configuration, logger logr.Logger, usageChan chan float64,
3939
tokenizer tokenization.Tokenizer) (*KVCacheHelper, error) {
4040
tokenProcConfig := kvblock.DefaultTokenProcessorConfig()
4141
tokenProcConfig.BlockSize = config.TokenBlockSize
@@ -71,6 +71,8 @@ func (h *KVCacheHelper) Activate() {
7171
h.blockCache.activate()
7272
}
7373

74+
// OnRequestStart called when request received, simulates KV-cache block management
75+
// Returns number of tokens found in the cache.
7476
func (h *KVCacheHelper) OnRequestStart(prompt, modelName, requestID string) (int, error) {
7577
h.logger.V(logging.TRACE).Info("KV cache - process request")
7678

pkg/llm-d-inference-sim/simulator.go

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ func (s *VllmSimulator) initializeSim(ctx context.Context) error {
380380
}
381381
s.logger.V(logging.DEBUG).Info("Chat template loaded", "template", s.chatTemplate, "params", s.chatTemplateKWArgs)
382382

383-
s.kvcacheHelper, err = kvcache.NewKVCacheHelper(ctx, s.config, s.logger, s.metrics.kvCacheUsageChan, s.tokenizer)
383+
s.kvcacheHelper, err = kvcache.NewKVCacheHelper(s.config, s.logger, s.metrics.kvCacheUsageChan, s.tokenizer)
384384
if err != nil {
385385
return err
386386
}
@@ -790,3 +790,37 @@ func (s *VllmSimulator) dequeue() *openaiserverapi.CompletionReqCtx {
790790

791791
return nil
792792
}
793+
794+
func (s *VllmSimulator) getPromptForKVCache(reqCtx *openaiserverapi.CompletionReqCtx) (string, error) {
795+
if reqCtx.IsChatCompletion {
796+
renderReq := preprocessing.RenderJinjaTemplateRequest{
797+
Conversations: make([]preprocessing.ChatMessage, 0),
798+
Tools: make([]interface{}, 0),
799+
Documents: make([]interface{}, 0),
800+
ReturnAssistantTokensMask: false,
801+
ContinueFinalMessage: false,
802+
AddGenerationPrompt: false,
803+
ChatTemplate: s.chatTemplate,
804+
ChatTemplateKWArgs: s.chatTemplateKWArgs,
805+
}
806+
// Convert messages to the format expected by the renderer
807+
for _, msg := range reqCtx.CompletionReq.GetMessages() {
808+
renderReq.Conversations = append(renderReq.Conversations, preprocessing.ChatMessage{
809+
Role: msg.Role,
810+
Content: msg.Content.Raw,
811+
})
812+
}
813+
814+
// Don't use vllmReq.GetModel() - it may include LoRA's name.
815+
// This call requires the base model name instead.
816+
prompt, err := s.tokenizer.RenderChatTemplate(s.config.Model, &renderReq)
817+
if err != nil {
818+
s.logger.Error(err, "failed to render template")
819+
return "", err
820+
}
821+
s.logger.Info("Convert prompt", "rendered", prompt)
822+
return prompt, nil
823+
}
824+
825+
return reqCtx.CompletionReq.GetPrompt(), nil
826+
}

pkg/llm-d-inference-sim/worker.go

Lines changed: 7 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import (
2626
"github.com/llm-d/llm-d-inference-sim/pkg/common"
2727
"github.com/llm-d/llm-d-inference-sim/pkg/common/logging"
2828
openaiserverapi "github.com/llm-d/llm-d-inference-sim/pkg/openai-server-api"
29-
preprocessing "github.com/llm-d/llm-d-kv-cache-manager/pkg/preprocessing/chat_completions"
3029
"github.com/valyala/fasthttp"
3130
)
3231

@@ -91,40 +90,14 @@ func (s *VllmSimulator) processRequestAsync(reqCtx *openaiserverapi.CompletionRe
9190
}
9291

9392
if s.config.EnableKVCache {
94-
var prompt string
93+
prompt, err := s.getPromptForKVCache(reqCtx)
9594

96-
if reqCtx.IsChatCompletion {
97-
renderReq := preprocessing.RenderJinjaTemplateRequest{
98-
Conversations: make([]preprocessing.ChatMessage, 0),
99-
Tools: make([]interface{}, 0),
100-
Documents: make([]interface{}, 0),
101-
ReturnAssistantTokensMask: false,
102-
ContinueFinalMessage: false,
103-
AddGenerationPrompt: false,
104-
ChatTemplate: s.chatTemplate,
105-
ChatTemplateKWArgs: s.chatTemplateKWArgs,
106-
}
107-
// Convert messages to the format expected by the renderer
108-
for _, msg := range req.GetMessages() {
109-
renderReq.Conversations = append(renderReq.Conversations, preprocessing.ChatMessage{
110-
Role: msg.Role,
111-
Content: msg.Content.Raw,
112-
})
113-
}
114-
115-
var err error
116-
// Don't use vllmReq.GetModel() - it may include LoRA's name.
117-
// This call requires the base model name instead.
118-
prompt, err = s.tokenizer.RenderChatTemplate(s.config.Model, &renderReq)
119-
if err != nil {
120-
s.logger.Error(err, "failed to render template")
121-
s.sendCompletionError(reqCtx.HTTPReqCtx,
122-
openaiserverapi.NewCompletionError(err.Error(), fasthttp.StatusInternalServerError, nil),
123-
false)
124-
return
125-
}
126-
} else {
127-
prompt = req.GetPrompt()
95+
if err != nil {
96+
s.logger.Error(err, "failed to render template")
97+
s.sendCompletionError(reqCtx.HTTPReqCtx,
98+
openaiserverapi.NewCompletionError(err.Error(), fasthttp.StatusInternalServerError, nil),
99+
false)
100+
return
128101
}
129102

130103
if numOfCachedTokens, err := s.kvcacheHelper.OnRequestStart(prompt, req.GetModel(), req.GetRequestID()); err != nil {

0 commit comments

Comments
 (0)