Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
e12f2e1
feat: implement ErrorBoundary component and enhance error handling in…
CodeNinjaSarthak Mar 7, 2026
a276a14
feat: enhance layout and styling for ClustersPanel and QuestionsFeed,…
CodeNinjaSarthak Mar 7, 2026
2e3b055
feat: add Skeleton component and implement loading states across vari…
CodeNinjaSarthak Mar 7, 2026
3c4e7f1
feat: add QuotaBanner component and integrate quota alert handling in…
CodeNinjaSarthak Mar 7, 2026
5d53da4
feat: infrastructure hardening — WS subscriber refactor, DB pooling, …
CodeNinjaSarthak Mar 9, 2026
881b8c4
Merge pull request #1 from CodeNinjaSarthak:feat/backend-improvement
CodeNinjaSarthak Mar 9, 2026
0a5db93
frontend updation
CodeNinjaSarthak Mar 11, 2026
b87cb38
fix: improve WebSocket error handling and connection closure
CodeNinjaSarthak Mar 12, 2026
cec839f
Merge branch 'feat/frontend-improvements' into dev
CodeNinjaSarthak Mar 12, 2026
b8a5e77
refactor(clustering): replace batch KMeans with online nearest-centro…
CodeNinjaSarthak Mar 12, 2026
7554704
feat(dashboard): add representative questions, debounced WS refetch, …
CodeNinjaSarthak Mar 13, 2026
040a8e5
feat: add WebSocket event publishing to workers, mock YouTube polling…
CodeNinjaSarthak Mar 13, 2026
6cd3f2b
refactor(classification): add structured Gemini classification with c…
CodeNinjaSarthak Mar 15, 2026
f8e8b1d
Added .claude to .gitignore
CodeNinjaSarthak Mar 15, 2026
4624996
fix: change edit-answer route from POST to PATCH for proper REST sema…
CodeNinjaSarthak Mar 15, 2026
79fc971
test: add black-box pytest suite for API contracts and security bound…
CodeNinjaSarthak Mar 15, 2026
f01e437
fix: cleanup audit findings — WS event, config thresholds, model cons…
CodeNinjaSarthak Mar 15, 2026
bdc7fa0
chore: apply black + isort formatting
CodeNinjaSarthak Mar 15, 2026
fc52c84
merge: fix/codebase-cleanup into dev
CodeNinjaSarthak Mar 15, 2026
af39d14
fix: scope metrics endpoint to current teacher only
CodeNinjaSarthak Mar 15, 2026
4b40ef9
merge: fix/metrics-ownership into dev
CodeNinjaSarthak Mar 15, 2026
27f0c16
feat: add Prometheus metrics instrumentation with Grafana Cloud export
CodeNinjaSarthak Mar 15, 2026
73ebab3
merge: feat/prometheus-metrics into dev
CodeNinjaSarthak Mar 15, 2026
77de92b
fix: update alloy config to use sys.env() (deprecated env() in v1.7)
CodeNinjaSarthak Mar 15, 2026
8d5784b
fix: publish WS event after cluster title summarization and resolve l…
CodeNinjaSarthak Mar 15, 2026
79464cf
merge: fix/cluster-title-ws-event into dev
CodeNinjaSarthak Mar 15, 2026
ba18f99
feat: add silent JWT refresh with concurrent request dedup
CodeNinjaSarthak Mar 15, 2026
d1232c8
merge: feat/silent-jwt-refresh into dev
CodeNinjaSarthak Mar 15, 2026
8970b2f
feat: add HNSW indexes to comments and rag_documents embedding columns
CodeNinjaSarthak Mar 15, 2026
2e6c535
feat: recreate clusters centroid HNSW index with m=16 and ef_construc…
CodeNinjaSarthak Mar 15, 2026
e403a5b
feat: add circuit breaker to GeminiClient
CodeNinjaSarthak Mar 15, 2026
263251e
feat: wire gemini circuit breaker state to Prometheus
CodeNinjaSarthak Mar 15, 2026
a7b444f
merge: feature/gemini-circuit-breaker into dev
CodeNinjaSarthak Mar 15, 2026
c25caed
fix: restart cooldown on failed half_open probe in circuit breaker
CodeNinjaSarthak Mar 15, 2026
edcf70a
refactor: extract process_task() from main() in classification and em…
CodeNinjaSarthak Mar 15, 2026
75e5a5d
test: add contract-based worker tests (circuit breaker, queue, pipeline)
CodeNinjaSarthak Mar 15, 2026
6f59877
refactor: single-source queue constants from workers/common/queue.py
CodeNinjaSarthak Mar 16, 2026
e0c2ada
merge: refactor/queue-constants-single-source into dev
CodeNinjaSarthak Mar 16, 2026
d31128d
fix: worker reliability — atomic approval, redis publish safety, even…
CodeNinjaSarthak Mar 16, 2026
dbaede1
merge: fix/worker-reliability into dev
CodeNinjaSarthak Mar 16, 2026
5333d6b
fix: silent crashes — decrypt error handling, embedding cleanup, cons…
CodeNinjaSarthak Mar 16, 2026
8a60f64
merge: fix/silent-crashes into dev
CodeNinjaSarthak Mar 16, 2026
1ede6da
fix: frontend critical — auth error surfacing, stale closure, oauth l…
CodeNinjaSarthak Mar 16, 2026
ff5094b
merge: fix/frontend-critical into dev
CodeNinjaSarthak Mar 16, 2026
275e46f
fix: sanitize HTML in user inputs and fix OAuth state cleanup ordering
CodeNinjaSarthak Mar 18, 2026
1148eec
chore: fix all linting errors across ruff, flake8, and pylint
CodeNinjaSarthak Mar 18, 2026
72ca2de
merge: fix/security into dev
CodeNinjaSarthak Mar 18, 2026
e2ce9da
feat: implement Gemini-backed content moderation for comments and ans…
CodeNinjaSarthak Mar 18, 2026
9984cc2
merge: feature/content-moderation into dev
CodeNinjaSarthak Mar 18, 2026
c81908c
fix: scope RAG retrieval to current teacher's documents only
CodeNinjaSarthak Mar 18, 2026
15f0430
merge: fix/rag-teacher-scoping into dev
CodeNinjaSarthak Mar 18, 2026
d776cda
feat: implement scheduler worker with quota reset and token cleanup
CodeNinjaSarthak Mar 18, 2026
6b1a10d
merge: feature/scheduler-tasks into dev
CodeNinjaSarthak Mar 18, 2026
a1a4644
feat: add behaviour tests for RAG, WebSocket, moderation, and scheduler
CodeNinjaSarthak Mar 18, 2026
ce83a00
merge: feature/behaviour-tests into dev
CodeNinjaSarthak Mar 18, 2026
fc4e337
chore: remove dead scaffold files and improve README for GitHub readi…
CodeNinjaSarthak Mar 18, 2026
6106305
merge: chore/github-readiness-cleanup into dev
CodeNinjaSarthak Mar 18, 2026
8b911ae
fix: resolve live feed race conditions, cluster title wrapping, and m…
CodeNinjaSarthak Mar 18, 2026
d502f68
feat: rebrand UI theme with new fonts, orange accent palette, and ent…
CodeNinjaSarthak Mar 18, 2026
2d2cd86
merge: fix/live-feed-and-cluster-title into dev
CodeNinjaSarthak Mar 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ REDIS_DECODE_RESPONSES=true

# Security
SECRET_KEY=change-me-in-production-use-strong-random-key
# Fernet encryption key for OAuth tokens (must be 32+ characters)
ENCRYPTION_KEY=change-me-must-be-32-chars-padded!
ALGORITHM=HS256
ACCESS_TOKEN_EXPIRE_MINUTES=30
REFRESH_TOKEN_EXPIRE_DAYS=7
Expand All @@ -40,6 +42,8 @@ YOUTUBE_REDIRECT_URI=http://localhost:8000/api/v1/youtube/auth/callback
# Rate Limiting
RATE_LIMIT_PER_MINUTE=60
RATE_LIMIT_BURST=10
# Toggle rate limiting on/off
RATE_LIMIT_ENABLED=true

# Quota Limits
DEFAULT_DAILY_ANSWER_LIMIT=100
Expand All @@ -51,6 +55,13 @@ QUEUE_CLASSIFICATION=classification
QUEUE_EMBEDDING=embedding
QUEUE_CLUSTERING=clustering
QUEUE_ANSWER_GENERATION=answer_generation
QUEUE_YOUTUBE_POSTING=youtube_posting

# Worker Thresholds
# Minimum confidence to forward a classified question to embedding
CLASSIFICATION_CONFIDENCE_THRESHOLD=0.4
# Minimum cosine similarity to join an existing cluster
CLUSTERING_SIMILARITY_THRESHOLD=0.65

# Logging
LOG_LEVEL=INFO
Expand All @@ -68,5 +79,27 @@ WEBSOCKET_TIMEOUT=300
WORKERS=classification,embeddings,clustering,answer_generation,trigger_monitor

GEMINI_API_KEY=your-gemini-api-key
# Gemini model for classification, answers, and summarization
GEMINI_MODEL=gemini-2.5-flash
# Gemini model for generating text embeddings
GEMINI_EMBEDDING_MODEL=gemini-embedding-001
# Number of questions needed before triggering cluster answer generation
CLUSTERING_THRESHOLD=5

# Mock / Testing
# Enable mock YouTube polling (no real API calls)
MOCK_YOUTUBE=false
# Seconds between mock YouTube messages
MOCK_MESSAGE_INTERVAL=2.0

# Frontend
# Absolute path to frontend/dist directory for static file serving
FRONTEND_DIR=

# Prometheus
PROMETHEUS_MULTIPROC_DIR=/tmp/prometheus_multiproc

FRONTEND_DIR=
# Grafana Cloud (Prometheus remote_write)
GCLOUD_HOSTED_METRICS_URL=
GCLOUD_HOSTED_METRICS_ID=
GCLOUD_RW_API_KEY=
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,8 @@ logs/
*.seed
tmp/
*.bak

CLAUDE.md
.claude

data-alloy
9 changes: 6 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ help:
@echo " make clean - Clean generated files"

run-backend:
cd backend && uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
cd backend && PYTHONPATH=$(CURDIR) uvicorn app.main:app --reload --host 0.0.0.0 --port 8000

run-workers:
python -m workers.runner
Expand All @@ -33,8 +33,11 @@ format:

lint:
ruff check backend workers scripts
flake8 backend workers scripts --max-line-length=119 --ignore=D107,D212,E501,W503,W605,D203,D100
pylint backend workers scripts --disable=line-too-long,trailing-whitespace,missing-function-docstring,consider-using-f-string,import-error,too-few-public-methods,redefined-outer-name
flake8 backend workers scripts --max-line-length=119 --ignore=D107,D212,E501,W503,W605,D203,D100 \
--per-file-ignores="backend/alembic/*:E402,F401 backend/app/main.py:E402,F824 backend/app/db/models/migrations/*:W391 workers/*/worker.py:E402,F824 workers/*/mock_worker.py:E402,F824 workers/runner.py:E402 scripts/*:E402,E226"
pylint backend workers scripts \
--ignore-paths="backend/alembic/versions/" \
--disable=line-too-long,trailing-whitespace,missing-function-docstring,missing-module-docstring,missing-class-docstring,consider-using-f-string,import-error,too-few-public-methods,redefined-outer-name,wrong-import-position,wrong-import-order,ungrouped-imports,invalid-name,logging-fstring-interpolation,global-statement,global-variable-not-assigned,unnecessary-pass,fixme,pointless-string-statement,broad-exception-caught,duplicate-code,too-many-locals,too-many-arguments,too-many-branches,too-many-statements,too-many-nested-blocks,too-many-instance-attributes,unused-argument,unused-import,unused-variable,no-member,import-outside-toplevel,raise-missing-from,not-callable,singleton-comparison,no-else-continue,implicit-str-concat,keyword-arg-before-vararg,missing-timeout,subprocess-run-check,protected-access

test:
pytest backend/tests workers -v
Expand Down
63 changes: 50 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,17 @@ youtube_polling worker ──► Redis queue

Comments flow from YouTube → Redis workers → Gemini AI for classification and embedding → pgvector for semantic clustering → answer generation → real-time WebSocket delivery to the teacher dashboard (and optionally back to the stream).

## Features

- **Real-time question clustering** — student comments are embedded and clustered live using nearest-centroid algorithm with milestone triggers
- **RAG-augmented answers** — AI-generated answers grounded in teacher-uploaded documents (PDF, DOCX, TXT)
- **YouTube integration** — polls live chat, posts answers directly back to YouTube
- **Content moderation** — Gemini-powered filtering before classification and before YouTube posting
- **WebSocket dashboard** — real-time updates with exponential backoff reconnection and 100-message cap
- **Teacher isolation** — every data endpoint enforces ownership; RAG retrieval is scoped per teacher
- **Observability** — Prometheus metrics, circuit breaker pattern on all Gemini calls, structured logging
- **Scheduled maintenance** — automatic daily quota reset and hourly expired token cleanup

## Quick Start

### Prerequisites
Expand Down Expand Up @@ -84,28 +95,47 @@ This starts PostgreSQL, Redis, the FastAPI backend, and all workers. The API is
cd backend && alembic upgrade head
```

## Running Without Docker
## Running Without Docker (Native Development)

**Prerequisites:**
- Python 3.13+
- Node.js 20+
- PostgreSQL 15+ with the [pgvector extension](https://github.com/pgvector/pgvector)
- Redis 7+

**Steps:**

1. **Clone and set up environment variables:**
```bash
cp .env.example .env.development
# Fill in your GEMINI_API_KEY, SECRET_KEY, ENCRYPTION_KEY, and YouTube OAuth credentials
```

**Backend:**
2. **Install backend dependencies:**
```bash
pip install -r backend/requirements.txt
uvicorn backend.app.main:app --reload
cd backend
python -m venv venv && source venv/bin/activate
pip install -r requirements.txt
```

**Workers:**
3. **Install frontend dependencies:**
```bash
python -m workers.classification.worker
python -m workers.embeddings.worker
python -m workers.clustering.worker
python -m workers.answer_generation.worker
python -m workers.trigger_monitor.worker
cd frontend && npm install
```

**Chrome extension:**
4. **Run database migrations:**
```bash
cd chrome-extension && npm install && npm run build
make migrate
```
Load `chrome-extension/dist` as an unpacked extension in Chrome.

5. **Start all services in one command:**
```bash
./start_dev.sh
```
This opens a tmux session with 9 panes: backend API, 6 AI workers, scheduler, and the Vite dev server.

6. **Open the app:**
Visit `http://localhost:5173`

## API

Expand All @@ -130,6 +160,13 @@ make lint # run linters
make test # run tests
```

## Known Limitations

- **No production deployment config** — docker-compose is development-oriented; nginx and production Dockerfile are not included
- **Chrome extension** — functional but not published to the Chrome Web Store
- **YouTube quota** — the YouTube Data API v3 has daily quota limits; high-traffic sessions may hit limits
- **Single-region** — no multi-region or horizontal scaling configuration

## License

MIT
22 changes: 22 additions & 0 deletions alloy/config.alloy
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
prometheus.scrape "fastapi" {
targets = [{
__address__ = "localhost:8000",
}]
metrics_path = "/metrics"
scrape_interval = "15s"
forward_to = [prometheus.remote_write.grafana_cloud.receiver]
}

prometheus.remote_write "grafana_cloud" {
endpoint {
url = sys.env("GCLOUD_HOSTED_METRICS_URL")
basic_auth {
username = sys.env("GCLOUD_HOSTED_METRICS_ID")
password = sys.env("GCLOUD_RW_API_KEY")
}
}
external_labels = {
job = "ai_doubt_manager",
environment = "development",
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""add hnsw indexes to comments and rag_documents embedding columns

Revision ID: 6f04ebe5f0fb
Revises: d4e5f6a7b8c9
Create Date: 2026-03-15 00:00:00.000000

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "6f04ebe5f0fb"
down_revision = "d4e5f6a7b8c9"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.execute("""
CREATE INDEX IF NOT EXISTS idx_comments_embedding_hnsw
ON comments
USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64)
""")
op.execute("""
CREATE INDEX IF NOT EXISTS idx_rag_documents_embedding_hnsw
ON rag_documents
USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64)
""")


def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS idx_rag_documents_embedding_hnsw")
op.execute("DROP INDEX IF EXISTS idx_comments_embedding_hnsw")
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""recreate clusters centroid hnsw index with tuning params

Revision ID: 6fe440076f64
Revises: 6f04ebe5f0fb
Create Date: 2026-03-15 00:00:00.000000

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "6fe440076f64"
down_revision = "6f04ebe5f0fb"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.execute("DROP INDEX IF EXISTS clusters_centroid_hnsw_idx")
op.execute("""
CREATE INDEX IF NOT EXISTS idx_clusters_centroid_embedding_hnsw
ON clusters
USING hnsw (centroid_embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64)
""")


def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS idx_clusters_centroid_embedding_hnsw")
op.execute("""
CREATE INDEX IF NOT EXISTS clusters_centroid_hnsw_idx
ON clusters
USING hnsw (centroid_embedding vector_cosine_ops)
""")
35 changes: 35 additions & 0 deletions backend/alembic/versions/c3d4e5f6a7b8_add_hnsw_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""add_hnsw_indexes

Revision ID: c3d4e5f6a7b8
Revises: b2c3d4e5f6a7
Create Date: 2026-03-09 00:00:00.000000

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "c3d4e5f6a7b8"
down_revision = "b2c3d4e5f6a7"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.execute("""
CREATE INDEX IF NOT EXISTS idx_comments_embedding_hnsw
ON comments
USING hnsw (embedding vector_l2_ops)
WITH (m = 16, ef_construction = 64)
""")
op.execute("""
CREATE INDEX IF NOT EXISTS idx_rag_documents_embedding_hnsw
ON rag_documents
USING hnsw (embedding vector_l2_ops)
WITH (m = 16, ef_construction = 64)
""")


def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS idx_rag_documents_embedding_hnsw")
op.execute("DROP INDEX IF EXISTS idx_comments_embedding_hnsw")
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""add clusters centroid hnsw index for cosine similarity

Revision ID: d4e5f6a7b8c9
Revises: c3d4e5f6a7b8
Create Date: 2026-03-12 00:00:00.000000

"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "d4e5f6a7b8c9"
down_revision = "c3d4e5f6a7b8"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.execute("""
CREATE INDEX IF NOT EXISTS clusters_centroid_hnsw_idx
ON clusters
USING hnsw (centroid_embedding vector_cosine_ops)
""")


def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS clusters_centroid_hnsw_idx")
Loading
Loading