-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
101 lines (84 loc) · 3.08 KB
/
Makefile
File metadata and controls
101 lines (84 loc) · 3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
.PHONY: build run test docker-build docker-push deploy clean
# Variables
IMAGE_NAME ?= issueparser
IMAGE_TAG ?= latest
# MicroK8s registry runs on localhost:32000
REGISTRY ?= localhost:32000
# Local development
build:
go build -o bin/issueparser ./cmd/issueparser
run: build
./bin/issueparser \
--repos="ollama/ollama,vllm-project/vllm" \
--keywords="multi-gpu,scale,concurrency,production" \
--max-issues=50 \
--llm-endpoint="http://localhost:8080" \
--output="issue-analysis-report.md"
# Run with port-forward to LLMKube
run-k8s: build
@echo "Make sure you have port-forwarded: kubectl port-forward svc/qwen-14b-issueparser-service 8080:8080"
./bin/issueparser \
--repos="ollama/ollama,vllm-project/vllm" \
--keywords="multi-gpu,scale,concurrency,production,performance,memory,VRAM" \
--max-issues=100 \
--llm-endpoint="http://localhost:8080" \
--output="issue-analysis-report.md" \
--verbose
test:
go test ./...
# Docker - build for AMD64 (Linux servers)
docker-build:
docker buildx build --platform linux/amd64 -t $(IMAGE_NAME):$(IMAGE_TAG) --load .
# Build and push to MicroK8s registry
docker-push: docker-build
docker tag $(IMAGE_NAME):$(IMAGE_TAG) $(REGISTRY)/$(IMAGE_NAME):$(IMAGE_TAG)
docker push $(REGISTRY)/$(IMAGE_NAME):$(IMAGE_TAG)
# Build multi-arch if needed
docker-build-multiarch:
docker buildx build --platform linux/amd64,linux/arm64 -t $(REGISTRY)/$(IMAGE_NAME):$(IMAGE_TAG) --push .
# Kubernetes deployment
deploy-llmkube:
kubectl apply -f deploy/llmkube-qwen-14b.yaml
@echo "Waiting for model to be ready..."
kubectl wait --for=condition=Ready model/qwen-14b-issueparser --timeout=600s || true
@echo "Waiting for inference service..."
kubectl wait --for=condition=Available deployment -l app=issueparser --timeout=300s || true
deploy-job:
kubectl apply -f deploy/job.yaml
deploy: deploy-llmkube deploy-job
# Get results from job
get-results:
@POD=$$(kubectl get pods -l job-name=issueparser-analysis -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) && \
if [ -n "$$POD" ]; then \
kubectl cp default/$$POD:/output/issue-analysis-report.md ./issue-analysis-report.md && \
echo "Report saved to issue-analysis-report.md"; \
else \
echo "No job pod found"; \
fi
# Watch job logs
logs:
kubectl logs -f job/issueparser-analysis
# Cleanup
clean:
rm -rf bin/
rm -f issue-analysis-report.md
clean-k8s:
kubectl delete -f deploy/job.yaml --ignore-not-found
kubectl delete -f deploy/llmkube-qwen-14b.yaml --ignore-not-found
kubectl delete pvc issueparser-output --ignore-not-found
# Port forward for local testing
port-forward:
kubectl port-forward svc/qwen-14b-issueparser-service 8080:8080
# Status check
status:
@echo "=== LLMKube Model ==="
kubectl get model qwen-14b-issueparser -o wide 2>/dev/null || echo "Not deployed"
@echo ""
@echo "=== LLMKube InferenceService ==="
kubectl get inferenceservice qwen-14b-issueparser-service -o wide 2>/dev/null || echo "Not deployed"
@echo ""
@echo "=== IssueParser Job ==="
kubectl get job issueparser-analysis 2>/dev/null || echo "Not running"
@echo ""
@echo "=== Pods ==="
kubectl get pods -l app=issueparser