Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,11 @@ data/
!data/.gitignore

# Models (will be mounted as volume or downloaded)
# Keep the model directory structure but exclude large files
models/**/*.bin
models/**/*.safetensors
models/**/*.onnx
models/

# Logs
*.log
wandb/

# OS
.DS_Store
Expand All @@ -62,6 +60,8 @@ Thumbs.db
Dockerfile
docker-compose.yml
.dockerignore
triton_model_repo/
artifacts/

# CI/CD
.github/
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
run: |
uv venv
source .venv/bin/activate
uv pip install -e .
uv pip install -e ".[dev]"

- name: Run tests with pytest
run: |
Expand Down Expand Up @@ -72,7 +72,7 @@ jobs:
run: |
uv venv
source .venv/bin/activate
uv pip install -e .
uv pip install -e ".[dev]"
uv pip install ruff

- name: Check code formatting with ruff
Expand Down
15 changes: 6 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,12 @@ COPY pyproject.toml uv.lock* ./

# Install Python dependencies directly (not editable to avoid README requirement)
# Note: We'll use CPU-only PyTorch for Docker to reduce image size
# Split install to avoid huge layer commit fails
RUN uv pip install --system --no-cache \
torch --index-url https://download.pytorch.org/whl/cpu

RUN uv pip install --system --no-cache \
torch --index-url https://download.pytorch.org/whl/cpu && \
uv pip install --system --no-cache \
transformers \
peft \
accelerate \
datasets \
evaluate \
scikit-learn \
seqeval \
fastapi \
uvicorn[standard] \
gradio \
Expand All @@ -37,7 +33,8 @@ RUN uv pip install --system --no-cache \
pandas \
tqdm \
onnx \
onnxruntime
onnxruntime \
tritonclient[http]

# Copy application code
COPY . .
Expand Down
15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ invoice-ner/
├── pyproject.toml # Python project configuration & dependencies
├── setup.sh # Development environment setup script
├── .env.example # Environment variables template
├── uv.lock # Lock file for reproducible installs
├── data/ # Dataset and labeling tools
│ ├── app.py # Streamlit labeling application
Expand All @@ -35,28 +36,36 @@ invoice-ner/
│ └── test_labels.json # Test data labels
├── models/ # Model files and checkpoints
│ ├── artifacts/ # Exported models (ONNX, etc.)
│ └── layoutlmv3-lora-invoice-number/ # Fine-tuned LoRA adapter
│ ├── adapter_config.json
│ ├── adapter_model.safetensors
│ └── ...
├── triton_model_repo/ # Triton Inference Server model repository
│ └── ...
├── notebooks/ # Jupyter notebooks for experimentation
│ ├── 01_heuristics.ipynb # Heuristic-based extraction
│ ├── 02_labeling.ipynb # Data labeling analysis
│ ├── 03_inference.ipynb # Model inference testing
│ └── 04_postprocess.ipynb # Post-processing experiments
│ ├── 04_postprocess.ipynb # Post-processing experiments
│ └── 05_evaluations.ipynb # Evaluation metrics and analysis
├── benchmarks/ # Benchmarking suite
│ ├── models/ # Model wrappers (Gemini, ONNX, etc.)
│ ├── benchmark_results/ # Benchmark run results
│ ├── benchmark.py # Main benchmark script
│ └── README.md # Benchmarking documentation
├── scripts/ # Utility scripts
│ ├── preprocess.py # Data preprocessing utilities
│ └── train.py
│ ├── export_to_onnx.py # ONNX export script
│ ├── setup_triton_repo.py # Triton repo setup script
│ └── train.py # Model training script
├── src/ # Core application modules
│ ├── __init__.py # Package initialization
│ ├── __init__.py
│ ├── api.py # FastAPI endpoints
│ ├── gradio_ui.py # Gradio interface
│ ├── inference.py # Model inference logic
Expand Down
22 changes: 21 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@ services:
- .env
environment:
- PYTHONUNBUFFERED=1
- DEVICE=${DEVICE:-cpu}
- DEVICE=cpu
- LOG_LEVEL=${LOG_LEVEL:-info}
- INFERENCE_BACKEND=triton
- TRITON_URL=tritonserver:8000
depends_on:
- tritonserver
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:7860/health"]
Expand All @@ -36,6 +40,22 @@ services:
cpus: '${DOCKER_CPU_RESERVATION:-2}'
memory: ${DOCKER_MEMORY_RESERVATION:-4G}

tritonserver:
image: nvcr.io/nvidia/tritonserver:23.10-py3
container_name: triton-server
ports:
- "8000:8000"
- "8001:8001"
- "8002:8002"
volumes:
- ./triton_model_repo:/models
command: ["tritonserver", "--model-repository=/models"]
deploy:
resources:
limits:
cpus: '${TRITON_CPU_LIMIT:-4}'
memory: ${TRITON_MEMORY_LIMIT:-8G}

networks:
default:
name: invoice-ner-network
21 changes: 19 additions & 2 deletions docs/DEV_SETUP.md
Original file line number Diff line number Diff line change
Expand Up @@ -279,17 +279,34 @@ pre-commit autoupdate

### Docker Build

The Dockerfile has been optimized for inference, excluding heavy training dependencies (`peft`, `datasets`, etc.) to reduce image size.

```bash
# Build image
docker build -t invoice-ner:latest .

# Build with specific tag
docker build -t invoice-ner:v1.0.0 .

# Build with no cache
# Build with no cache (useful if you changed dependencies)
docker build --no-cache -t invoice-ner:latest .
```

### Running with Docker Compose

Docker Compose is the recommended way to run the application as it handles the model server (Triton) and application services together.

```bash
# Start all services (detached mode)
docker-compose up -d --build

# View logs
docker-compose logs -f

# Stop all services
docker-compose down
```

### Production Deployment Considerations

1. **Use a reverse proxy** (nginx, Traefik) for SSL/TLS termination
Expand All @@ -309,7 +326,7 @@ FROM python:3.10-slim as builder
WORKDIR /app
COPY pyproject.toml uv.lock ./
RUN pip install uv && \
uv pip install --system --no-cache torch && \
uv pip install --system --no-cache torch --index-url https://download.pytorch.org/whl/cpu && \
uv pip install --system --no-cache -e .

# Runtime stage
Expand Down
24 changes: 14 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,11 @@ license = { text = "MIT" }
keywords = ["invoice", "ner", "layoutlmv3", "document-ai", "ocr"]

dependencies = [
"dvc>=3.63.0",
"ipykernel>=6.30.1",
"pandas>=2.3.2",
"pillow>=11.3.0",
"pre-commit>=4.3.0",
"streamlit>=1.49.1",
"tqdm>=4.67.1",
"torch",
"transformers",
"datasets",
"peft",
"accelerate",
"evaluate",
"scikit-learn",
"seqeval>=1.2.2",
"numpy>=1.24.0",
# Web app dependencies
"fastapi>=0.104.0",
Expand All @@ -36,6 +26,20 @@ dependencies = [
"onnxruntime-tools>=1.7.0",
"onnxconverter-common>=1.14.0",
"tritonclient[http]>=2.41.0",
]

[project.optional-dependencies]
dev = [
"dvc>=3.63.0",
"ipykernel>=6.30.1",
"pre-commit>=4.3.0",
"streamlit>=1.49.1",
"datasets",
"peft",
"accelerate",
"evaluate",
"scikit-learn",
"seqeval>=1.2.2",
# Testing dependencies
"pytest>=8.0.0",
"pytest-cov>=4.1.0",
Expand Down
Loading