diff --git a/.gitignore b/.gitignore index 1d88472..8511344 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ __pycache__ .idea +.venv .vscode allocator bin/ @@ -7,4 +8,3 @@ coverage*out test/allocator/allocator test/infra/allocator/config.json test/integration/integration-test -venv \ No newline at end of file diff --git a/Makefile b/Makefile index 9ce48b9..35bf954 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,15 @@ INTEGRATION_COVERAGE_FILE := coverage.integration.out OVERALL_COVERAGE_FILE := coverage.overall.out COVERAGE_SUMMARY_FILE := coverage.out INTEGRATION_TEST_BIN := ./test/integration/integration-test +ALLOCATOR_TEST_BIN := ./test/allocator/allocator + +PYTHON_BIN ?= python3.14 +PYTHON_REQUIREMENTS_FILE := requirements.txt +PYTHON_VENV_DIR ?= .venv +PYTHON_VENV_BIN := $(PYTHON_VENV_DIR)/bin +PYTHON_VENV_PYTHON := $(PYTHON_VENV_BIN)/python +PYTHON_ANALYZE_DIR := ./test/allocator/analyze +PYTHON_ANALYZE_SCRIPT := $(PYTHON_ANALYZE_DIR)/compare.py .PHONY: help help: @@ -30,22 +39,50 @@ install-lint: @mkdir -p "$(LOCAL_BIN)" @sh -s -- "$(LOCAL_BIN)" "$(GOLANGCI_LINT_VERSION)" "$(GOLANGCI_LINT_INSTALL_METHOD)" < scripts/install_golangci_lint.sh +.PHONY: python-venv +python-venv: +## Create venv with PYTHON_BIN and update pip. + $(PYTHON_BIN) -m venv $(PYTHON_VENV_DIR) + $(PYTHON_VENV_PYTHON) -m pip install --upgrade pip + +.PHONY: python-deps +python-deps: python-venv +## Install Python dependencies from requirements file. + $(PYTHON_VENV_PYTHON) -m pip install -r $(PYTHON_REQUIREMENTS_FILE) + +.PHONY: python-check +python-check: python-deps +## Validate Python dependency graph and script syntax. + $(PYTHON_VENV_PYTHON) -m pip check + $(PYTHON_VENV_PYTHON) -m py_compile $(PYTHON_ANALYZE_DIR)/*.py + +.PHONY: docker-check +docker-check: +## Ensure Docker daemon is reachable. + @docker info >/dev/null 2>&1 || { \ + echo "error: Docker is required. Install Docker and make sure the daemon is running, then retry."; \ + exit 1; \ + } + .PHONY: generate generate: ## Regenerate protobuf stubs for allocator schema. ## Requires protoc to be installed: https://grpc.io/docs/protoc-installation/ @command -v protoc >/dev/null || { \ - echo "error: protoc is required; install it first: https://grpc.io/docs/protoc-installation/"; \ + echo "error: protoc is required. Install it first: https://grpc.io/docs/protoc-installation/"; \ exit 1; \ } cd test/allocator/schema && ./generate.sh +.PHONY: allocator-build +allocator-build: +## Build allocator binary. + go build -o $(ALLOCATOR_TEST_BIN) ./test/allocator + .PHONY: build -build: +build: allocator-build ## Build all Go packages. -## Also build allocator demo binary. go build ./... - go build ./test/allocator .PHONY: unit-test unit-test: @@ -69,24 +106,32 @@ test: unit-test integration-test "$(OVERALL_COVERAGE_FILE)" \ "$(COVERAGE_SUMMARY_FILE)" +.PHONY: lint-prepare +lint-prepare: install-lint +## Verify linter configuration. + $(GOLANGCI_LINT) config verify + .PHONY: lint -lint: install-lint +lint: lint-prepare ## Analyze code locally. ## Use project-local golangci-lint from ./bin. -## Verify linter config before checks. - $(GOLANGCI_LINT) config verify $(GOLANGCI_LINT) run ./... .PHONY: fix -fix: install-lint +fix: lint-prepare ## Apply automatic source fixes. ## Run go mod tidy, and golangci-lint --fix. go mod tidy - $(GOLANGCI_LINT) config verify $(GOLANGCI_LINT) run --fix ./... +.PHONY: allocator-analyze +allocator-analyze: allocator-build docker-check python-check +## Run allocator docker benchmark and render plots. +## Requires Docker daemon and test allocator binary. + $(PYTHON_VENV_PYTHON) $(PYTHON_ANALYZE_SCRIPT) + .PHONY: clean clean: -## Remove generated test and coverage artifacts. -## Keep workspace clean between test runs. - rm -f $(UNIT_COVERAGE_FILE) $(INTEGRATION_COVERAGE_FILE) $(OVERALL_COVERAGE_FILE) $(COVERAGE_SUMMARY_FILE) $(INTEGRATION_TEST_BIN) +## Remove generated build, test, and Python cache artifacts. + rm -f $(UNIT_COVERAGE_FILE) $(INTEGRATION_COVERAGE_FILE) $(OVERALL_COVERAGE_FILE) $(COVERAGE_SUMMARY_FILE) $(INTEGRATION_TEST_BIN) $(ALLOCATOR_TEST_BIN) + rm -rf "$(PYTHON_ANALYZE_DIR)/__pycache__" diff --git a/README.md b/README.md index 91cfece..3d7b47b 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,8 @@ The MemLimiter comprises two main parts: ## Quick start guide +For command workflows and expected outputs, see [`make-workflows.md`](make-workflows.md). + ### Services without `Cgo` Refer to the [example service](test/allocator/server/server.go). diff --git a/make-workflows.md b/make-workflows.md new file mode 100644 index 0000000..3878d87 --- /dev/null +++ b/make-workflows.md @@ -0,0 +1,136 @@ +# Make Workflows (Linux / WSL) + +This file describes how to use project `Makefile` targets and what output to expect. + +## Prerequisites + +- Go toolchain available in `PATH`. +- Python `3.14` available as `python3.14` (or override with `PYTHON_BIN`). +- Docker daemon running for allocator analysis. +- `protoc` installed only if you run `make generate`. + +## Quick Start + +Run these once on a fresh checkout: + +```bash +make install-lint +make python-check +``` + +Expected results: + +- `bin/golangci-lint` is installed. +- `.venv` is created and dependencies are installed from `requirements.txt`. +- `pip check` prints `No broken requirements found.` + +## Daily Development Flow + +### 1) Build + +```bash +make build +``` + +What it does: + +- Builds allocator demo binary: `test/allocator/allocator`. +- Builds all Go packages in the repository. + +### 2) Lint + +```bash +make lint +``` + +What it does: + +- Installs pinned `golangci-lint` if needed. +- Verifies linter config. +- Runs `golangci-lint run ./...`. + +### 3) Auto-fix + +```bash +make fix +``` + +What it does: + +- Runs `go mod tidy`. +- Runs `golangci-lint` with `--fix`. + +### 4) Tests + +```bash +make unit-test +make integration-test +make test +``` + +Expected artifacts: + +- `coverage.unit.out` +- `coverage.integration.out` +- `coverage.overall.out` +- `coverage.out` (human-readable summary) +- `test/integration/integration-test` (integration test binary) + +## Allocator Analysis Flow + +Run: + +```bash +make allocator-analyze +``` + +This target runs: + +1. `make allocator-build` +2. `make docker-check` +3. `make python-check` +4. Python benchmark/plot script: `test/allocator/analyze/compare.py` + +Expected console signals: + +- Lines like `>>> Start case: ...` +- Progress logs from allocator perf client. + +Expected output directory: + +- `/tmp/allocator/allocator_/` + +Expected generated files: + +- `control_params.png` +- `rss.png` +- Per-case directories with: + - `server_config.json` + - `perf_config.json` + - `tracker.csv` + +## Utility Targets + +- `make help` - print all available targets. +- `make python-venv` - create `.venv` and upgrade `pip`. +- `make python-deps` - install dependencies from `requirements.txt`. +- `make python-check` - run `pip check` and Python syntax compile. +- `make docker-check` - fail fast if Docker daemon is unavailable. +- `make allocator-build` - build `test/allocator/allocator` only. +- `make generate` - regenerate protobuf files for allocator schema. +- `make lint-prepare` - install lint tools and verify lint config. +- `make clean` - remove generated binaries, coverage files, and Python cache for analyzer scripts. + +## Common Overrides + +Use a different Python interpreter: + +```bash +make python-check PYTHON_BIN=python3.13 +``` + +Use a different virtual environment directory: + +```bash +make python-check PYTHON_VENV_DIR=.venv-local +``` diff --git a/requirements.txt b/requirements.txt index 5a778aa..ebbc3ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,22 +1,23 @@ -certifi==2022.5.18.1 -charset-normalizer==2.0.12 -cycler==0.11.0 -docker==5.0.3 -fonttools==4.33.3 -humanize==4.1.0 -idna==3.3 -Jinja2==3.1.2 -kiwisolver==1.4.3 -MarkupSafe==2.1.1 -matplotlib==3.5.2 -numpy==1.22.4 -packaging==21.3 -pandas==1.4.2 -Pillow==9.1.1 -pyparsing==3.0.9 -python-dateutil==2.8.2 -pytz==2022.1 -requests==2.28.0 -six==1.16.0 -urllib3==1.26.9 -websocket-client==1.3.2 +certifi==2026.2.25 +charset-normalizer==3.4.7 +contourpy==1.3.3 +cycler==0.12.1 +docker==7.1.0 +fonttools==4.62.1 +humanize==4.15.0 +idna==3.11 +Jinja2==3.1.6 +kiwisolver==1.5.0 +MarkupSafe==3.0.3 +matplotlib==3.10.8 +numpy==2.4.4 +packaging==26.1 +pandas==3.0.2 +Pillow==12.2.0 +pyparsing==3.3.2 +python-dateutil==2.9.0.post0 +pytz==2026.1.post1 +requests==2.33.1 +six==1.17.0 +urllib3==2.6.3 +websocket-client==1.9.0 diff --git a/test/allocator/analyze/compare.py b/test/allocator/analyze/compare.py index 70775b4..78db468 100755 --- a/test/allocator/analyze/compare.py +++ b/test/allocator/analyze/compare.py @@ -16,6 +16,9 @@ image_tag: Final = 'allocator' dockerfile_path: Final = 'test/allocator' container_name: Final = 'allocator' +# We retry exactly once after the first failure to handle a transient +# container startup race, while still failing fast on real errors. +EXEC_RUN_ATTEMPTS: Final = 2 class PerfConfigRenderer: @@ -63,7 +66,9 @@ class ServerConfigRenderer: {% endif %} "listen_endpoint": "0.0.0.0:1988", "tracker": { - "path": "/etc/allocator/tracker.csv", + "backend_file": { + "path": "/etc/allocator/tracker.csv" + }, "period": "10ms" } } @@ -101,7 +106,7 @@ def __build_image(self): def execute(self, mem_limit: str, session_dir_path: os.PathLike): try: - # drop container if exists + # Drop container if it exists. container = self.client.containers.get(container_name) container.remove(force=True) except docker.errors.NotFound: @@ -111,6 +116,7 @@ def execute(self, mem_limit: str, session_dir_path: os.PathLike): name=container_name, image=image_tag, mem_limit=mem_limit, + user=f"{os.getuid()}:{os.getgid()}", volumes={ str(session_dir_path): { 'bind': '/etc/allocator', @@ -120,15 +126,27 @@ def execute(self, mem_limit: str, session_dir_path: os.PathLike): detach=True, ) - _, logs = container.exec_run( - cmd='/usr/local/bin/allocator perf -c /etc/allocator/perf_config.json', - stream=True, - ) - - for log in logs: - print(log) - - container.stop() + try: + for attempt in range(EXEC_RUN_ATTEMPTS): + try: + _, logs = container.exec_run( + cmd='/usr/local/bin/allocator perf -c /etc/allocator/perf_config.json', + stream=True, + ) + for log in logs: + print(log) + break + except docker.errors.APIError as err: + container.reload() + if container.status in ("dead", "exited"): + logs = container.logs(tail=200).decode(errors="replace") + raise RuntimeError( + f"allocator container stopped before perf run, status={container.status}\n{logs}" + ) from err + if attempt == EXEC_RUN_ATTEMPTS - 1: + raise + finally: + container.remove(force=True) def run_session( diff --git a/test/allocator/analyze/render.py b/test/allocator/analyze/render.py index dbfdc57..efbe06d 100755 --- a/test/allocator/analyze/render.py +++ b/test/allocator/analyze/render.py @@ -39,7 +39,7 @@ def control_params_subplots(reports: List[Report], path: os.PathLike): ax.set_xlabel('Time, seconds') - # RSS plot + # RSS consumption plot. color = 'tab:red' l0 = ax.plot(df['elapsed_time'], df['rss'], color=color, label='RSS') ax.set_ylabel('RSS, bytes') @@ -47,14 +47,14 @@ def control_params_subplots(reports: List[Report], path: os.PathLike): ax.set_yticks([ml * 1024 * 1024 for ml in (256, 512, 512 + 256, 1024)]) ax.yaxis.set_major_formatter(bytes_major_formatter) - # GOGC plot + # GOGC consumption plot. color = 'tab:blue' twin1 = ax.twinx() l1 = twin1.plot(df['elapsed_time'], df['gogc'], color=color, label='GOGC') twin1.set_ylabel('GOGC') twin1.set_ylim(-5, 105) - # Throttling plot + # Throttling plot. color = 'tab:green' twin2 = ax.twinx() twin2.spines.right.set_position(("axes", 1.2)) @@ -62,12 +62,12 @@ def control_params_subplots(reports: List[Report], path: os.PathLike): twin2.set_ylabel('Throttling') twin2.set_ylim(-5, 105) - # legend + # Legend. if not ls or not labels: ls = l0 + l1 + l2 labels = [l.get_label() for l in ls] - # title + # Title. if report.session.params.unlimited: title = "MemLimiter disabled" else: diff --git a/test/allocator/analyze/report.py b/test/allocator/analyze/report.py index abc7b4c..476b4c3 100644 --- a/test/allocator/analyze/report.py +++ b/test/allocator/analyze/report.py @@ -31,7 +31,7 @@ def __parse_tracker_stats(path: os.PathLike) -> pd.DataFrame: return df def __post_init__(self): - # Emulate OOM event for unconstrained process + # Emulate OOM event for unconstrained process. if self.session.params.unlimited: last_ts, last_but_one_ts = self.df['timestamp'].iloc[-1], self.df['timestamp'].iloc[-2] delta = last_ts - last_but_one_ts @@ -41,6 +41,6 @@ def __post_init__(self): 0, 0, 0, ] - # compute elapsed time + # Compute elapsed time. self.df['elapsed_time'] = (self.df['timestamp'] - self.df['timestamp'].min()).apply( lambda x: x.seconds + x.microseconds / 1000000) diff --git a/test/allocator/analyze/testing.py b/test/allocator/analyze/testing.py index ad7c461..f6bee36 100644 --- a/test/allocator/analyze/testing.py +++ b/test/allocator/analyze/testing.py @@ -52,7 +52,7 @@ def make_sessions(root_dir: os.PathLike) -> Iterable[Session]: Params(unlimited=False, load_duration=duration, rss_limit=GIGABYTE, coefficient=50), ) - # FIXME: remove after debug + # FIXME: Remove after debug. # cases = ( # Params(unlimited=True, load_duration="10s", rss_limit=GIGABYTE), # )