-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathMakefile
More file actions
101 lines (74 loc) · 3.03 KB
/
Makefile
File metadata and controls
101 lines (74 loc) · 3.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
.PHONY: develop build test test-rust test-python test-wasm bench bench-rust bench-python bench-compare lint lint-fix format format-fix wasm wasm-nodejs npm-publish lang-packages npm-publish-languages
develop:
cd python && maturin develop
@SO=$$(find .venv -name "_native*.so" 2>/dev/null | head -1); \
if [ -n "$$SO" ]; then mkdir -p _native && cp "$$SO" _native/; fi
build:
cd python && maturin build
# Build for PyPI (manylinux wheels, requires Docker)
build-pypi:
docker run --rm -v $(PWD):/io -w /io ghcr.io/pyo3/maturin build --release -o dist
test: test-rust test-python test-wasm
test-rust:
cargo test -p badwords-core
test-python:
@if [ -d .venv ]; then .venv/bin/python -m pytest tests/ -v; \
else python3 -m pytest tests/ -v; fi
test-wasm:
cd rust/badwords-wasm && wasm-pack test --node
bench: bench-rust bench-python
bench-compare:
@echo "BadWords vs glin-profanity (requires: pip install glin-profanity)"
@if [ -d .venv ]; then .venv/bin/python scripts/bench_compare.py; \
else python3 scripts/bench_compare.py; fi
bench-rust:
cargo bench -p badwords-core
bench-python:
@if [ -d .venv ]; then .venv/bin/python -m pytest tests/bench_filter.py -v --benchmark-only; \
else python3 -m pytest tests/bench_filter.py -v --benchmark-only; fi
# Ruff: lint (check only)
lint:
@if [ -d .venv ]; then .venv/bin/ruff check .; else ruff check .; fi
# Ruff: format check (CI)
format:
@if [ -d .venv ]; then .venv/bin/ruff format --check .; else ruff format --check .; fi
# Ruff: format fix (apply formatting)
format-fix:
@if [ -d .venv ]; then .venv/bin/ruff format .; else ruff format .; fi
# Ruff: lint with auto-fix
lint-fix:
@if [ -d .venv ]; then .venv/bin/ruff check . --fix; else ruff check . --fix; fi
# WebAssembly build for browser
wasm:
cd rust/badwords-wasm && wasm-pack build --target web --out-dir pkg
# WebAssembly build for Node.js
wasm-nodejs:
cd rust/badwords-wasm && wasm-pack build --target nodejs --out-dir pkg
npm-publish:
cd rust/badwords-wasm/pkg && npm publish
lang-packages:
python3 scripts/generate-lang-packages.py
npm-publish-languages:
cd js/languages && npm publish --access public
# ML training (requires: pip install -r ml/requirements.txt)
ml-prepare:
cd ml && python prepare_data.py --preset multilingual
# Full dataset (~600k samples, ~8-10h training with xlm-roberta)
ml-prepare-full:
cd ml && python prepare_data.py --preset multilingual --max-total 600000
# Max dataset (no cap, ~1M+ samples, ~15-20h)
ml-prepare-max:
cd ml && python prepare_data.py --preset multilingual
ml-train:
cd ml && python train.py
ml-test:
cd ml && python test_inference.py
# Quantize model: 500MB -> ~135MB
ml-quantize:
cd ml && python quantize_model.py
# Package ML model for GitHub Release (upload as badwords-ml-model.zip)
# Quantizes model first (~4x smaller)
ml-package: ml-quantize
@if [ ! -f ml/models/model.onnx ]; then echo "Run ml-train first"; exit 1; fi
(cd ml/models && zip -r ../../badwords-ml-model.zip . -x "checkpoints/*" -x "checkpoints/*/*")
@echo "Created badwords-ml-model.zip — upload to GitHub Release"