BadWords/Makefile at master · FlacSy/BadWords · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
.PHONY: develop build test test-rust test-python test-wasm bench bench-rust bench-python bench-compare lint lint-fix format format-fix wasm wasm-nodejs npm-publish lang-packages npm-publish-languages

develop:
	cd python && maturin develop
	@SO=$$(find .venv -name "_native*.so" 2>/dev/null | head -1); \
	if [ -n "$$SO" ]; then mkdir -p _native && cp "$$SO" _native/; fi

build:
	cd python && maturin build

# Build for PyPI (manylinux wheels, requires Docker)
build-pypi:
	docker run --rm -v $(PWD):/io -w /io ghcr.io/pyo3/maturin build --release -o dist

test: test-rust test-python test-wasm

test-rust:
	cargo test -p badwords-core

test-python:
	@if [ -d .venv ]; then .venv/bin/python -m pytest tests/ -v; \
	else python3 -m pytest tests/ -v; fi

test-wasm:
	cd rust/badwords-wasm && wasm-pack test --node

bench: bench-rust bench-python

bench-compare:
	@echo "BadWords vs glin-profanity (requires: pip install glin-profanity)"
	@if [ -d .venv ]; then .venv/bin/python scripts/bench_compare.py; \
	else python3 scripts/bench_compare.py; fi

bench-rust:
	cargo bench -p badwords-core

bench-python:
	@if [ -d .venv ]; then .venv/bin/python -m pytest tests/bench_filter.py -v --benchmark-only; \
	else python3 -m pytest tests/bench_filter.py -v --benchmark-only; fi

# Ruff: lint (check only)
lint:
	@if [ -d .venv ]; then .venv/bin/ruff check .; else ruff check .; fi

# Ruff: format check (CI)
format:
	@if [ -d .venv ]; then .venv/bin/ruff format --check .; else ruff format --check .; fi

# Ruff: format fix (apply formatting)
format-fix:
	@if [ -d .venv ]; then .venv/bin/ruff format .; else ruff format .; fi

# Ruff: lint with auto-fix
lint-fix:
	@if [ -d .venv ]; then .venv/bin/ruff check . --fix; else ruff check . --fix; fi

# WebAssembly build for browser
wasm:
	cd rust/badwords-wasm && wasm-pack build --target web --out-dir pkg

# WebAssembly build for Node.js
wasm-nodejs:
	cd rust/badwords-wasm && wasm-pack build --target nodejs --out-dir pkg

npm-publish:
	cd rust/badwords-wasm/pkg && npm publish

lang-packages:
	python3 scripts/generate-lang-packages.py

npm-publish-languages:
	cd js/languages && npm publish --access public

# ML training (requires: pip install -r ml/requirements.txt)
ml-prepare:
	cd ml && python prepare_data.py --preset multilingual

# Full dataset (~600k samples, ~8-10h training with xlm-roberta)
ml-prepare-full:
	cd ml && python prepare_data.py --preset multilingual --max-total 600000

# Max dataset (no cap, ~1M+ samples, ~15-20h)
ml-prepare-max:
	cd ml && python prepare_data.py --preset multilingual

ml-train:
	cd ml && python train.py

ml-test:
	cd ml && python test_inference.py

# Quantize model: 500MB -> ~135MB
ml-quantize:
	cd ml && python quantize_model.py

# Package ML model for GitHub Release (upload as badwords-ml-model.zip)
# Quantizes model first (~4x smaller)
ml-package: ml-quantize
	@if [ ! -f ml/models/model.onnx ]; then echo "Run ml-train first"; exit 1; fi
	(cd ml/models && zip -r ../../badwords-ml-model.zip . -x "checkpoints/*" -x "checkpoints/*/*")
	@echo "Created badwords-ml-model.zip — upload to GitHub Release"