diff --git a/.gitignore b/.gitignore index 88758ae..83e76bb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,8 @@ HELP.md -Dockerfile target/ .mvn/wrapper/maven-wrapper.jar !**/src/main/**/target/ !**/src/test/**/target/ -aplication.properties ### STS ### @@ -35,9 +33,10 @@ build/ ### VS Code ### .vscode/ -application.properties - +### Environment ### .env -Kixi -demo.iml -docker-compose.yml \ No newline at end of file +.env.local +.env.*.local + +### Application ### +application.properties diff --git a/Dockerfile b/Dockerfile index de142e4..5227e06 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,12 @@ # Etapa 1: Build -FROM maven:3.9.6-eclipse-temurin-21-alpine AS build +FROM maven:3.9.6-eclipse-temurin-17-alpine AS build WORKDIR /app COPY services/backend-api/pom.xml . COPY services/backend-api/src ./src RUN mvn clean package -DskipTests # Etapa 2: Runtime -FROM eclipse-temurin:21-jre-alpine +FROM eclipse-temurin:17-jre-alpine WORKDIR /app COPY --from=build /app/target/*.jar app.jar EXPOSE 8080 diff --git a/conceptual/architecture/flows/@ocr.md b/conceptual/architecture/flows/@ocr.md new file mode 100644 index 0000000..2013aed --- /dev/null +++ b/conceptual/architecture/flows/@ocr.md @@ -0,0 +1,322 @@ +# Mapeamento OCR para Entidades do Banco de Dados + +Este documento descreve como os dados extraídos via OCR são mapeados para as entidades do banco de dados do sistema Kixi - Banco de Enunciados. + +## Visão Geral do Fluxo + +``` +Imagem/PDF → OCR Service (Python) → Backend API (Spring) → Banco de Dados (PostgreSQL) +``` + +### Exemplo de estrutura de prova + +``` +REPÚBLICA DE ANGOLA +GOVERNO DA PROVÍNCIA DE LUANDA +GABINETE PROVINCIAL DE EDUCAÇÃO +DEPARTAMENTO DE EDUCAÇÃO E ENSINO + +PROVA DE EXAME DE MATEMÁTICA +12ª Classe Ano Lectivo: 2024/2025 Duração: 90 Min. Série: B +CURSO: TODOS +``` + +### Exemplo de Cotação (Rodapé) + +``` +Cotação 1-a) 3 valores 2-) 4 valores 3-a) 2,5 valores 3-b) 2,5 valores 4-) 3 valores 5-a) 2,5 valores 5-b) 2,5 valores. +``` + +--- + +## Mapeamento: OCR → Entidades + +### 1. Statement (Enunciado) + +| Campo OCR | Campo Entidade | Tipo | Exemplo | +|-----------|----------------|------|---------| +| `examType` | `exam_type` | String | "Prova de Exame" | +| `durationMinutes` | `duration_minutes` | Integer | 90 | +| `variant` | `variant` | String | "B" | +| `title` | `title` | String | "Prova de Exame de Matemática 12ª Classe - Série B - 2024/2025" | +| `instructions` | `instructions` | String | "Leia com atenção, coloque na folha de prova..." | +| `totalMaxScore` | `total_max_score` | Double | 20.0 | +| `overallConfidence` | `ocr_confidence` | Double | 0.85 | +| `requestId` | `ocr_request_id` | String | "req-abc123xyz" | +| - | `source` | String | "ocr" (fixo) | +| - | `needs_review` | Boolean | true/false (baseado na confiança) | + +### 2. SchoolYear (Ano Letivo) + +| Campo OCR | Campo Entidade | Tipo | Exemplo | +|-----------|----------------|------|---------| +| `schoolYearStart` | `start_year` | Integer | 2024 | +| `schoolYearEnd` | `end_year` | Integer | 2025 | + +**Padrões de Extração:** +- `Ano Letivo: 2024/2025` +- `Ano Lectivo: 2024/2025` +- `2024/2025` +- `2024-2025` + +### 3. Subject (Disciplina) + +| Campo OCR | Campo Entidade | Tipo | Exemplo | +|-----------|----------------|------|---------| +| `subjectName` | `name` | String | "Matemática" | +| - | `code` | String | "MAT" (gerado) | +| - | `short_name` | String | "Matemática" (gerado) | + +**Padrões de Extração:** +- `PROVA DE EXAME DE MATEMÁTICA` +- `PROVA DE RECURSO DE FÍSICA` +- `EXAME DE QUÍMICA` + +**Normalização de Nomes:** +- "matematica" → "Matemática" +- "fisica" → "Física" +- "quimica" → "Química" +- "portugues" → "Português" +- (etc.) + +### 4. Course (Curso) + +| Campo OCR | Campo Entidade | Tipo | Exemplo | +|-----------|----------------|------|---------| +| `courseName` | `name` | String | "TODOS" | +| - | `code` | String | "TOD" (gerado) | + +**Padrões de Extração:** +- `CURSO: TODOS` +- `Curso: Ciências` + +### 5. Class (Turma) + +| Campo OCR | Campo Entidade | Tipo | Exemplo | +|-----------|----------------|------|---------| +| `classGrade` | `grade` | Integer | 12 | +| - | `code` | String | "12A-TOD" (gerado) | + +**Padrões de Extração:** +- `12ª Classe` +- `12º Ano` +- `10ª classe` + +### 6. Question (Questão) + +| Campo OCR | Campo Entidade | Tipo | Exemplo | +|-----------|----------------|------|---------| +| `number` | `number` | Integer | 1 | +| `text.value` | `text` | String | "Resolve a seguinte equação exponencial..." | +| `type` | `question_type` | String | "development" / "multiple_choice" | +| `cotacao` | `max_score` | Double | 3.0 | +| `confidence` | `ocr_confidence` | Double | 0.9 | +| `pageIndex` | `page_index` | Integer | 0 | +| - | `order_index` | Integer | 0 (sequencial) | +| - | `needs_review` | Boolean | true/false (baseado na confiança) | + +**Mapeamento de Tipos:** +| Tipo OCR | Tipo BD | +|----------|---------| +| `dissertativa` | `development` | +| `multipla_escolha` | `multiple_choice` | +| `unknown` | `unknown` | + +--- + +## Extração de Cotação + +### Formato Angolano + +A cotação geralmente aparece no final da prova no formato: + +``` +Cotação 1-a) 3 valores 2-) 4 valores 3-a) 2,5 valores 3-b) 2,5 valores 4-) 3 valores 5-a) 2,5 valores 5-b) 2,5 valores. +``` + +### Padrões Suportados + +1. **Questão com subitem:** `1-a) 3 valores` +2. **Questão sem subitem:** `2-) 4 valores` +3. **Valores decimais:** `3-b) 2,5 valores` +4. **Formato alternativo:** `(3 valores)` no final da questão + +### Mapeamento Interno + +O sistema cria um mapa de cotação: + +```json +{ + "1a": 3.0, + "2": 4.0, + "3a": 2.5, + "3b": 2.5, + "4": 3.0, + "5a": 2.5, + "5b": 2.5 +} +``` + +E depois associa a cada questão: +- Se a questão tem subitems, soma as cotações dos subitems +- Se não tem subitems, usa a cotação direta + +--- + +## Endpoints para Teste + +### Python OCR Service (porta 8000) + +```bash +# Health check +curl http://localhost:8000/ocr/health + +# Extração simples +curl -X POST http://localhost:8000/ocr/v1/extract/simple \ + -F "image=@prova.jpg" + +# Extração completa (múltiplas imagens/PDF) +curl -X POST http://localhost:8000/ocr/v1/extract \ + -F "images=@prova.pdf" + +# Idiomas suportados +curl http://localhost:8000/ocr/v1/supported-languages +``` + +### Spring Backend API (porta 8080) + +```bash +# Health check +curl http://localhost:8080/api/v1/ocr/health + +# Extração simples +curl -X POST http://localhost:8080/api/v1/ocr/extract/single \ + -F "file=@prova.jpg" + +# Extração de exame estruturado +curl -X POST http://localhost:8080/api/v1/ocr/extract/exam \ + -F "files=@prova.pdf" + +# Extração e persistência no banco +curl -X POST "http://localhost:8080/api/v1/ocr/extract-and-persist?createdBy=1" \ + -F "files=@prova.pdf" +``` + +--- + +## Resposta JSON de Exemplo + +### Extração de Exame (`/api/v1/ocr/extract/exam`) + +```json +{ + "exam_type": "Prova de Exame", + "duration_minutes": 90, + "variant": "B", + "title": "Prova de Exame de Matemática 12ª Classe - Série B - 2024/2025", + "instructions": "Leia a prova com atenção...", + "school_year_start": 2024, + "school_year_end": 2025, + "class_grade": "12", + "course_name": "TODOS", + "subject_name": "Matemática", + "total_max_score": 20.0, + "questions": [ + { + "number": "1", + "subitems": ["a)"], + "text": "Resolve a seguinte equação exponencial...", + "type": "dissertativa", + "cotacao": 3.0, + "options": null, + "has_image": true, + "image_description": "Expressão matemática complexa", + "confidence": 0.85 + }, + { + "number": "2", + "subitems": [], + "text": "Em um meio de cultura especial, a quantidade de bactérias...", + "type": "dissertativa", + "cotacao": 4.0, + "options": null, + "has_image": false, + "confidence": 0.9 + } + ], + "images_to_upload": [ + { + "suggested_filename": "prova-matematica-2024-2025-serie-b-cabecalho.png", + "description": "Cabeçalho oficial com brasão/logo institucional", + "region": "cabecalho" + }, + { + "suggested_filename": "prova-matematica-2024-2025-serie-b-questao-1.png", + "description": "Expressão matemática complexa", + "region": "questao_1" + } + ], + "request_id": "req-abc123xyz", + "processing_time_ms": 2500, + "overall_confidence": 0.87, + "needs_review": false, + "warnings": [] +} +``` + +--- + +## Script de Teste + +Use o script `test-ocr.sh` na raiz do projeto: + +```bash +# Ver ajuda +./test-ocr.sh --help + +# Verificar saúde dos serviços +./test-ocr.sh --check + +# Testar extração de exame +./test-ocr.sh -e prova.jpg + +# Testar todos os endpoints +./test-ocr.sh prova.pdf +``` + +--- + +## Troubleshooting + +### Cotação não extraída + +1. Verifique se a cotação está no formato esperado +2. A cotação deve estar no final da prova +3. Formatos suportados: `X valores`, `X pontos`, `X pts` + +### Disciplina não reconhecida + +1. Verifique se o nome está na lista de normalização +2. O padrão `PROVA DE [EXAME|RECURSO] DE ` é prioritário + +### Baixa confiança + +1. Melhore a qualidade da imagem (resolução mínima: 150 DPI) +2. Evite imagens com muito ruído ou anotações manuscritas +3. PDFs digitais têm melhor resultado que fotos + +--- + +## Formatos Suportados + +| Formato | Extensões | Notas | +|---------|-----------|-------| +| JPEG | .jpg, .jpeg | Fotos de provas | +| PNG | .png | Scans de alta qualidade | +| PDF | .pdf | Multipáginas suportado | +| WebP | .webp | Compressão moderna | +| BMP | .bmp | Sem compressão | +| TIFF | .tiff, .tif | Scans profissionais | + +**Limite de tamanho:** 20MB por arquivo +**Máximo de arquivos:** 10 por requisição \ No newline at end of file diff --git a/conceptual/architecture/implementation-guides/auth-jwt-rbac-oauth2.md b/conceptual/architecture/implementation-guides/auth-jwt-rbac-oauth2.md new file mode 100644 index 0000000..1b80459 --- /dev/null +++ b/conceptual/architecture/implementation-guides/auth-jwt-rbac-oauth2.md @@ -0,0 +1,91 @@ +# Implementation Guide: Authentication and Authorization (JWT + RBAC + Google OAuth2) + +**Status:** Proposed for implementation + +Sistema de autenticação e autorização seguro com JWT, RBAC (Role e AccountRole) e login via Google OAuth2. + +--- + +## 1. Visão geral + +- **Login tradicional:** `POST /api/v1/auth/login` (username ou email + password) → valida contra `Account` (BCrypt), carrega roles via `AccountRole`, emite JWT. +- **Login Google:** utilizador é redirecionado para o Google → callback com `code` → backend troca por access token, obtém email/nome, encontra ou cria `Account` e `User`, atribui role padrão se novo → emite o nosso JWT. +- **Rotas:** públicas = `/api/v1/auth/**`; privadas = resto da API, com filtro JWT e opcionalmente RBAC por endpoint. + +--- + +## 2. Fluxos + +### 2.1 Login tradicional + +1. Cliente envia `POST /api/v1/auth/login` com `{ "usernameOrEmail": "...", "password": "..." }`. +2. Backend resolve account por username ou email, verifica password com BCrypt, verifica conta ativa. +3. Carrega roles do account via `AccountRole` + `Role`. +4. Gera JWT com claims: `sub` (accountId), `roles` (lista de nomes), `exp` (expiração). +5. Resposta: `{ "accessToken": "...", "tokenType": "Bearer", "expiresAt": "...", "accountId": 1, "roles": ["ADMIN"] }`. + +### 2.2 Login Google OAuth2 + +1. Cliente acede a `GET /api/v1/auth/google` → backend redireciona para Google (URL de autorização com `client_id`, `redirect_uri`, `scope=openid email profile`). +2. Utilizador autentica-se no Google; Google redireciona para `GET /api/v1/auth/google/callback?code=...`. +3. Backend troca `code` por access_token (POST ao token endpoint do Google com client_id, client_secret, code, redirect_uri). +4. Backend chama userinfo do Google (email, name) e procura `Account` por email. +5. Se não existir: cria `Account` (email, username derivado do email, sem password ou password aleatório), cria `User` (firstName/lastName a partir do name), atribui role padrão (ex.: USER) via `AccountRole`. +6. Se existir: garante que tem pelo menos um role. +7. Emite JWT (mesmo formato do login tradicional) e devolve (ex.: redirect para frontend com token em query ou cookie, ou JSON no body). + +--- + +## 3. Componentes + +| Componente | Responsabilidade | +|------------|------------------| +| **JwtService** | Gerar JWT (accountId, roles, exp), validar token, extrair claims. | +| **AuthController** | Endpoints: login, redirect Google, callback Google. | +| **AuthService** | Validar credenciais (username/email + password), carregar roles, orquestrar login e callback Google. | +| **SecurityWebFilterChain** | Permitir `/api/v1/auth/**`, exigir autenticação no resto; filtro que lê `Authorization: Bearer `, valida JWT e preenche SecurityContext (principal = accountId, authorities = roles). | +| **RBAC** | Em endpoints protegidos: ler roles do SecurityContext; opcionalmente `@PreAuthorize("hasRole('ADMIN')")` ou verificação manual. | + +--- + +## 4. Entidades envolvidas + +- **Account:** username, email, passwordHash, emailVerified, active, lastLogin (já existente). +- **User:** accountId, firstName, lastName, photo (já existente; perfil ligado ao Account). +- **Role / AccountRole:** N:N já implementado; roles do account usados no JWT e na autorização. + +--- + +## 5. Configuração + +- **application.properties / env:** + - `jwt.secret` (base64 ou string), `jwt.expiration-ms` + - `google.client-id`, `google.client-secret`, `app.auth.google.redirect-uri` (ex.: `http://localhost:8080/api/v1/auth/google/callback`) +- **Google Cloud Console:** criar credenciais OAuth 2.0 (tipo “Web application”), definir redirect URI igual ao configurado. + +--- + +## 6. Rotas públicas vs privadas + +- **Públicas (sem JWT):** + - `POST /api/v1/auth/login` + - `GET /api/v1/auth/google` (redirect) + - `GET /api/v1/auth/google/callback` + - Opcional: health, actuator, docs. +- **Privadas:** todas as outras sob `/api/v1/**`; filtro JWT obrigatório; RBAC por endpoint conforme necessário. + +--- + +## 7. Erros + +- **401 Unauthorized:** token em falta, inválido ou expirado. +- **403 Forbidden:** token válido mas sem permissão (role insuficiente). +- Mensagens claras em JSON (ex.: ProblemDetail) para o cliente. + +--- + +## 8. Testes + +- Unitários: JwtService (gerar/validar), AuthService (validação de password, carga de roles). +- Integração: login tradicional, callback Google (mock do token endpoint e userinfo), acesso a rota protegida com/sem token e com/sem role. +- Cenários: token expirado (401), role insuficiente (403). diff --git a/conceptual/architecture/implementation-guides/crud-flux.md b/conceptual/architecture/implementation-guides/crud-flux.md index f7598f6..b9f200a 100644 --- a/conceptual/architecture/implementation-guides/crud-flux.md +++ b/conceptual/architecture/implementation-guides/crud-flux.md @@ -1,7 +1,3 @@ -Here’s a full English version of your CRUD implementation guide, adapted for the entities you listed: - ---- - # Reactive CRUD Implementation Guide for the Project This guide details the standard pattern for implementing CRUD operations in the backend located at `services/backend-api`, using the `schoolYears` CRUD as a reference. To implement any new CRUD, replace `[EntityName]` with the name of your entity and refer to the `schoolYears` CRUD files for practical examples. diff --git a/docker-compose.yml b/docker-compose.yml index c15a1bd..bcc9e55 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,6 +13,79 @@ services: - ./services/backend-api/docker/postgres/init.sql:/docker-entrypoint-initdb.d/init.sql networks: - kixi_network + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres -d kixi_db"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + networks: + - kixi_network + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres -d kixi_db"] + interval: 10s + timeout: 5s + retries: 5 + + kixi_backend: + build: + context: . + dockerfile: Dockerfile + container_name: kixi_backend + environment: + DB_HOST: kixi_postgres + DB_PORT: 5432 + DB_NAME: kixi_db + DB_USERNAME: postgres + DB_PASSWORD: postgres + GROQ_API_KEY: ${GROQ_API_KEY} + GROQ_MODEL: ${GROQ_MODEL:-llama-3.3-70b-versatile} + ports: + - "8080:8080" + networks: + - kixi_network + restart: unless-stopped + + kixi_ocr: + build: + context: ./services/ocr-service + dockerfile: ../../infra/docker/ocr.Dockerfile + container_name: kixi_ocr + ports: + - "8000:8000" + networks: + - kixi_network + restart: unless-stopped + + kixi_backend: + build: + context: . + dockerfile: Dockerfile + container_name: kixi_backend + environment: + DB_HOST: kixi_postgres + DB_PORT: 5432 + DB_NAME: kixi_db + DB_USERNAME: postgres + DB_PASSWORD: postgres + GROQ_API_KEY: ${GROQ_API_KEY} + GROQ_MODEL: ${GROQ_MODEL:-llama-3.3-70b-versatile} + ports: + - "8080:8080" + networks: + - kixi_network + restart: unless-stopped + + kixi_ocr: + build: + context: ./services/ocr-service + dockerfile: ../../infra/docker/ocr.Dockerfile + container_name: kixi_ocr + ports: + - "8000:8000" + networks: + - kixi_network + restart: unless-stopped volumes: kixi_postgres_data: diff --git a/infra/docker/backend.Dockerfile b/infra/docker/backend.Dockerfile index b51ab4c..7c986cc 100644 --- a/infra/docker/backend.Dockerfile +++ b/infra/docker/backend.Dockerfile @@ -1,14 +1,86 @@ +# Backend API Dockerfile +# +# Multi-stage build for the Kixi Backend API (Spring Boot WebFlux) +# Optimized for production deployments with minimal image size +# +# Build from project root: +# docker build -f infra/docker/backend.Dockerfile -t kixi-backend-api . +# +# Or using docker-compose (recommended): +# docker-compose up --build backend-api + +# ============================================================================= +# Stage 1: Builder +# ============================================================================= FROM eclipse-temurin:17-jdk-jammy AS build + WORKDIR /app -COPY mvnw . -COPY .mvn .mvn -COPY pom.xml . + +# Copy Maven wrapper and configuration from services/backend-api +COPY services/backend-api/mvnw . +COPY services/backend-api/.mvn .mvn +COPY services/backend-api/pom.xml . + +# Make Maven wrapper executable RUN chmod +x mvnw + +# Download dependencies (cached layer) RUN ./mvnw dependency:go-offline -B -COPY src ./src + +# Copy source code from services/backend-api +COPY services/backend-api/src ./src + +# Build the application (skip tests for faster builds) RUN ./mvnw clean package -DskipTests -FROM eclipse-temurin:17-jre-jammy + +# ============================================================================= +# Stage 2: Runtime +# ============================================================================= +FROM eclipse-temurin:17-jre-jammy AS runtime + +# Labels +LABEL maintainer="Kixi Team " \ + org.opencontainers.image.title="Kixi Backend API" \ + org.opencontainers.image.description="Spring Boot WebFlux Backend API for the Kixi platform" \ + org.opencontainers.image.version="0.0.1-SNAPSHOT" \ + org.opencontainers.image.vendor="Creative Mode" \ + org.opencontainers.image.source="https://github.com/creative-mode/kixi" + +# Set environment variables +ENV JAVA_OPTS="-XX:+UseContainerSupport -XX:MaxRAMPercentage=75.0" \ + SPRING_PROFILES_ACTIVE=docker \ + SERVER_PORT=8080 \ + TZ=Africa/Luanda + +# Install useful tools +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + tzdata \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Create non-root user for security +RUN groupadd --gid 1000 spring && \ + useradd --uid 1000 --gid spring --shell /bin/bash --create-home spring + +# Set working directory WORKDIR /app + +# Copy the built JAR from builder stage COPY --from=build /app/target/demo-0.0.1-SNAPSHOT.jar app.jar + +# Change ownership to non-root user +RUN chown -R spring:spring /app + +# Switch to non-root user +USER spring + +# Expose port EXPOSE 8080 -ENTRYPOINT ["java","-jar","app.jar"] \ No newline at end of file + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD curl -f http://localhost:8080/actuator/health || exit 1 + +# Run the application +ENTRYPOINT ["sh", "-c", "java $JAVA_OPTS -jar app.jar"] diff --git a/infra/docker/ocr.Dockerfile b/infra/docker/ocr.Dockerfile index 66956c5..79d1641 100644 --- a/infra/docker/ocr.Dockerfile +++ b/infra/docker/ocr.Dockerfile @@ -6,7 +6,7 @@ WORKDIR /app RUN apt-get update && apt-get install -y \ tesseract-ocr \ tesseract-ocr-por \ - libgl1-mesa-glx \ + libgl1 \ libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* diff --git a/services/backend-api/.env.example b/services/backend-api/.env.example index c83bd44..c749a33 100644 --- a/services/backend-api/.env.example +++ b/services/backend-api/.env.example @@ -3,4 +3,7 @@ DB_USER=postgres DB_PASSWORD=postgres DB_HOST=postgres DB_PORT=5432 -SPRING_PROFILES_ACTIVE=docker \ No newline at end of file +SPRING_PROFILES_ACTIVE=docker +GROQ_API_KEY=your_groq_api_key_here +GROQ_API_URL=https://api.groq.com/openai/v1 +GROQ_MODEL=openai/gpt-oss-120b diff --git a/services/backend-api/docker/postgres/init.sql b/services/backend-api/docker/postgres/init.sql index 6889a47..33f11b6 100644 --- a/services/backend-api/docker/postgres/init.sql +++ b/services/backend-api/docker/postgres/init.sql @@ -1,9 +1,369 @@ -CREATE DATABASE creativemode; +-- ============================================================================= +-- Kixi Database Initialization Script +-- ============================================================================= +-- This script creates all required tables for the Kixi platform. +-- It should be idempotent (safe to run multiple times). +-- ============================================================================= -CREATE TABLE anos_letivos ( +-- Create database if not exists (handled by Docker) +-- CREATE DATABASE kixi; + +-- ============================================================================= +-- School Years Table +-- ============================================================================= +CREATE TABLE IF NOT EXISTS school_years ( + id BIGSERIAL PRIMARY KEY, + start_year INTEGER NOT NULL, + end_year INTEGER NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP, + CONSTRAINT uk_school_years UNIQUE (start_year, end_year), + CONSTRAINT ck_school_years_interval CHECK (end_year > start_year) +); + +-- ============================================================================= +-- Terms Table (Trimesters/Periods) +-- ============================================================================= +CREATE TABLE IF NOT EXISTS terms ( + id BIGSERIAL PRIMARY KEY, + number INTEGER NOT NULL, + name VARCHAR(100) NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP, + CONSTRAINT uk_terms_number UNIQUE (number) +); + +-- ============================================================================= +-- Subjects Table +-- ============================================================================= +CREATE TABLE IF NOT EXISTS subjects ( + id BIGSERIAL PRIMARY KEY, + code VARCHAR(20) NOT NULL, + name VARCHAR(200) NOT NULL, + short_name VARCHAR(50), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP, + CONSTRAINT uk_subjects_code UNIQUE (code) +); + +-- ============================================================================= +-- Courses Table +-- ============================================================================= +CREATE TABLE IF NOT EXISTS courses ( + id BIGSERIAL PRIMARY KEY, + code VARCHAR(20) NOT NULL, + name VARCHAR(200) NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP, + CONSTRAINT uk_courses_code UNIQUE (code) +); + +-- ============================================================================= +-- Classes Table +-- ============================================================================= +CREATE TABLE IF NOT EXISTS classes ( + id BIGSERIAL PRIMARY KEY, + code VARCHAR(50), + grade INTEGER NOT NULL, + course_id BIGINT REFERENCES courses(id), + school_year_id BIGINT REFERENCES school_years(id), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_classes_course ON classes(course_id); +CREATE INDEX IF NOT EXISTS idx_classes_school_year ON classes(school_year_id); + +-- ============================================================================= +-- Accounts Table +-- ============================================================================= +CREATE TABLE IF NOT EXISTS accounts ( + id BIGSERIAL PRIMARY KEY, + username VARCHAR(100) NOT NULL, + email VARCHAR(255) NOT NULL, + password_hash VARCHAR(255) NOT NULL, + email_verified BOOLEAN DEFAULT FALSE, + last_login TIMESTAMP, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP, + CONSTRAINT uk_accounts_username UNIQUE (username), + CONSTRAINT uk_accounts_email UNIQUE (email) +); + +-- ============================================================================= +-- Users Table (Profile information) +-- ============================================================================= +CREATE TABLE IF NOT EXISTS users ( + id BIGSERIAL PRIMARY KEY, + account_id BIGINT NOT NULL REFERENCES accounts(id), + first_name VARCHAR(100), + last_name VARCHAR(100), + photo VARCHAR(500), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP, + CONSTRAINT uk_users_account UNIQUE (account_id) +); + +-- ============================================================================= +-- Roles Table +-- ============================================================================= +CREATE TABLE IF NOT EXISTS roles ( + id BIGSERIAL PRIMARY KEY, + name VARCHAR(50) NOT NULL, + description VARCHAR(255), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP, + CONSTRAINT uk_roles_name UNIQUE (name) +); + +-- ============================================================================= +-- Account Roles Table (Many-to-Many) +-- ============================================================================= +CREATE TABLE IF NOT EXISTS account_roles ( + account_id BIGINT NOT NULL REFERENCES accounts(id), + role_id BIGINT NOT NULL REFERENCES roles(id), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP, + PRIMARY KEY (account_id, role_id) +); + +-- ============================================================================= +-- Sessions Table +-- ============================================================================= +CREATE TABLE IF NOT EXISTS sessions ( + id BIGSERIAL PRIMARY KEY, + account_id BIGINT NOT NULL REFERENCES accounts(id), + token VARCHAR(500) NOT NULL, + ip_address VARCHAR(50), + expires_at TIMESTAMP NOT NULL, + last_used TIMESTAMP, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_sessions_account ON sessions(account_id); +CREATE INDEX IF NOT EXISTS idx_sessions_token ON sessions(token); + +-- ============================================================================= +-- Statements Table (Exam Papers) +-- ============================================================================= +CREATE TABLE IF NOT EXISTS statements ( + id BIGSERIAL PRIMARY KEY, + exam_type VARCHAR(100), + duration_minutes INTEGER, + variant VARCHAR(10), + title VARCHAR(500), + instructions TEXT, + total_max_score DECIMAL(10, 2), + school_year_id BIGINT REFERENCES school_years(id), + term_id BIGINT REFERENCES terms(id), + subject_id BIGINT REFERENCES subjects(id), + class_id BIGINT REFERENCES classes(id), + course_id BIGINT REFERENCES courses(id), + created_by BIGINT REFERENCES accounts(id), + visible BOOLEAN DEFAULT FALSE, + needs_review BOOLEAN DEFAULT FALSE, + ocr_confidence DECIMAL(5, 4), + ocr_request_id VARCHAR(100), + source VARCHAR(50) DEFAULT 'manual', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_statements_school_year ON statements(school_year_id); +CREATE INDEX IF NOT EXISTS idx_statements_term ON statements(term_id); +CREATE INDEX IF NOT EXISTS idx_statements_subject ON statements(subject_id); +CREATE INDEX IF NOT EXISTS idx_statements_class ON statements(class_id); +CREATE INDEX IF NOT EXISTS idx_statements_created_by ON statements(created_by); +CREATE INDEX IF NOT EXISTS idx_statements_visible ON statements(visible); +CREATE INDEX IF NOT EXISTS idx_statements_needs_review ON statements(needs_review); +CREATE INDEX IF NOT EXISTS idx_statements_source ON statements(source); +CREATE INDEX IF NOT EXISTS idx_statements_deleted_at ON statements(deleted_at); + +-- ============================================================================= +-- Questions Table +-- ============================================================================= +CREATE TABLE IF NOT EXISTS questions ( + id BIGSERIAL PRIMARY KEY, + statement_id BIGINT NOT NULL REFERENCES statements(id) ON DELETE CASCADE, + number INTEGER NOT NULL, + text TEXT NOT NULL, + question_type VARCHAR(50) NOT NULL DEFAULT 'unknown', + max_score DECIMAL(10, 2), + order_index INTEGER, + ocr_confidence DECIMAL(5, 4), + page_index INTEGER DEFAULT 0, + needs_review BOOLEAN DEFAULT FALSE, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP, + CONSTRAINT uk_questions_statement_number UNIQUE (statement_id, number) +); + +CREATE INDEX IF NOT EXISTS idx_questions_statement ON questions(statement_id); +CREATE INDEX IF NOT EXISTS idx_questions_type ON questions(question_type); +CREATE INDEX IF NOT EXISTS idx_questions_deleted_at ON questions(deleted_at); + +-- ============================================================================= +-- Question Images Table +-- ============================================================================= +CREATE TABLE IF NOT EXISTS question_images ( + id BIGSERIAL PRIMARY KEY, + question_id BIGINT NOT NULL REFERENCES questions(id) ON DELETE CASCADE, + image_url VARCHAR(1000) NOT NULL, + caption VARCHAR(500), + order_index INTEGER DEFAULT 0, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_question_images_question ON question_images(question_id); + +-- ============================================================================= +-- Question Options Table (Multiple Choice Answers) +-- ============================================================================= +CREATE TABLE IF NOT EXISTS question_options ( id BIGSERIAL PRIMARY KEY, - ano_inicio INT NOT NULL, - ano_fim INT NOT NULL, - CONSTRAINT uk_anos_letivos UNIQUE (ano_inicio, ano_fim), - CONSTRAINT ck_anos_letivos_intervalo CHECK (ano_fim > ano_inicio) + question_id BIGINT NOT NULL REFERENCES questions(id) ON DELETE CASCADE, + option_label VARCHAR(10) NOT NULL, + option_text TEXT NOT NULL, + is_correct BOOLEAN DEFAULT FALSE, + order_index INTEGER DEFAULT 0, + ocr_confidence DECIMAL(5, 4), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP, + CONSTRAINT uk_question_options_label UNIQUE (question_id, option_label) ); + +CREATE INDEX IF NOT EXISTS idx_question_options_question ON question_options(question_id); +CREATE INDEX IF NOT EXISTS idx_question_options_deleted_at ON question_options(deleted_at); + +-- ============================================================================= +-- Simulations Table (Student Exam Attempts) +-- ============================================================================= +CREATE TABLE IF NOT EXISTS simulations ( + id BIGSERIAL PRIMARY KEY, + account_id BIGINT NOT NULL REFERENCES accounts(id), + statement_id BIGINT NOT NULL REFERENCES statements(id), + school_year_id BIGINT REFERENCES school_years(id), + started_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + finished_at TIMESTAMP, + time_spent_seconds INTEGER, + final_score DECIMAL(10, 2), + status VARCHAR(50) DEFAULT 'in_progress', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_simulations_account ON simulations(account_id); +CREATE INDEX IF NOT EXISTS idx_simulations_statement ON simulations(statement_id); +CREATE INDEX IF NOT EXISTS idx_simulations_status ON simulations(status); + +-- ============================================================================= +-- Simulation Answers Table +-- ============================================================================= +CREATE TABLE IF NOT EXISTS simulation_answers ( + id BIGSERIAL PRIMARY KEY, + simulation_id BIGINT NOT NULL REFERENCES simulations(id) ON DELETE CASCADE, + question_id BIGINT NOT NULL REFERENCES questions(id), + selected_option_id BIGINT REFERENCES question_options(id), + answer_text TEXT, + score_obtained DECIMAL(10, 2), + is_correct BOOLEAN, + answered_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_simulation_answers_simulation ON simulation_answers(simulation_id); +CREATE INDEX IF NOT EXISTS idx_simulation_answers_question ON simulation_answers(question_id); + +-- ============================================================================= +-- Default Data +-- ============================================================================= + +-- Insert default roles +INSERT INTO roles (name, description) VALUES + ('ADMIN', 'System administrator with full access'), + ('TEACHER', 'Teacher with access to create and manage statements'), + ('STUDENT', 'Student with access to view statements and take simulations') +ON CONFLICT (name) DO NOTHING; + +-- Insert default terms +INSERT INTO terms (number, name) VALUES + (1, '1º Trimestre'), + (2, '2º Trimestre'), + (3, '3º Trimestre') +ON CONFLICT (number) DO NOTHING; + +-- Insert sample subjects +INSERT INTO subjects (code, name, short_name) VALUES + ('MAT', 'Matemática', 'Mat'), + ('PORT', 'Língua Portuguesa', 'Port'), + ('FIS', 'Física', 'Fís'), + ('QUIM', 'Química', 'Quím'), + ('BIO', 'Biologia', 'Bio'), + ('HIST', 'História', 'Hist'), + ('GEO', 'Geografia', 'Geo'), + ('ING', 'Inglês', 'Ing'), + ('FIL', 'Filosofia', 'Fil') +ON CONFLICT (code) DO NOTHING; + +-- Insert sample school year +INSERT INTO school_years (start_year, end_year) VALUES + (2024, 2025) +ON CONFLICT (start_year, end_year) DO NOTHING; + +-- ============================================================================= +-- Functions and Triggers +-- ============================================================================= + +-- Function to update updated_at timestamp +CREATE OR REPLACE FUNCTION update_updated_at_column() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ language 'plpgsql'; + +-- Apply trigger to all tables with updated_at column +DO $$ +DECLARE + t text; +BEGIN + FOR t IN + SELECT table_name FROM information_schema.columns + WHERE column_name = 'updated_at' + AND table_schema = 'public' + LOOP + EXECUTE format(' + DROP TRIGGER IF EXISTS update_%I_updated_at ON %I; + CREATE TRIGGER update_%I_updated_at + BEFORE UPDATE ON %I + FOR EACH ROW + EXECUTE FUNCTION update_updated_at_column(); + ', t, t, t, t); + END LOOP; +END; +$$ language 'plpgsql'; + +-- ============================================================================= +-- Grant Permissions (if needed for specific users) +-- ============================================================================= +-- GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO kixi; +-- GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO kixi; diff --git a/services/backend-api/pom.xml b/services/backend-api/pom.xml index 9f7c68e..ed65686 100644 --- a/services/backend-api/pom.xml +++ b/services/backend-api/pom.xml @@ -1,8 +1,10 @@ - - + + 4.0.0 @@ -10,7 +12,7 @@ org.springframework.boot spring-boot-starter-parent 3.2.1 - + com.example @@ -30,6 +32,12 @@ spring-boot-starter-webflux + + org.springdoc + springdoc-openapi-starter-webflux-ui + 2.3.0 + + org.springframework.boot spring-boot-starter-data-r2dbc @@ -46,6 +54,11 @@ spring-boot-starter-validation + + org.springframework.boot + spring-boot-starter-security + + org.projectlombok lombok @@ -67,13 +80,13 @@ io.jsonwebtoken jjwt-api - 0.11.5 + 0.12.5 io.jsonwebtoken jjwt-impl - 0.11.5 + 0.12.5 runtime @@ -86,7 +99,7 @@ io.jsonwebtoken jjwt-jackson - 0.11.5 + 0.12.5 runtime @@ -99,19 +112,19 @@ spring-boot-starter-test test - + - io.r2dbc + org.postgresql r2dbc-postgresql - 0.8.13.RELEASE + runtime io.projectreactor reactor-test test - - + + diff --git a/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/027699cd-23cb-442b-9ce9-2ae6ad3f4e0a-lopito.png b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/027699cd-23cb-442b-9ce9-2ae6ad3f4e0a-lopito.png new file mode 100644 index 0000000..ca9e06b Binary files /dev/null and b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/027699cd-23cb-442b-9ce9-2ae6ad3f4e0a-lopito.png differ diff --git a/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/4d22d2b5-46b3-4c96-8077-e1015fe4e77e-teste.jpg b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/4d22d2b5-46b3-4c96-8077-e1015fe4e77e-teste.jpg new file mode 100644 index 0000000..06def72 --- /dev/null +++ b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/4d22d2b5-46b3-4c96-8077-e1015fe4e77e-teste.jpg @@ -0,0 +1 @@ +arquivo de teste diff --git a/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/747eba92-4579-454d-b9d6-b10d64e446d4-teste.jpg b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/747eba92-4579-454d-b9d6-b10d64e446d4-teste.jpg new file mode 100644 index 0000000..06def72 --- /dev/null +++ b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/747eba92-4579-454d-b9d6-b10d64e446d4-teste.jpg @@ -0,0 +1 @@ +arquivo de teste diff --git a/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/78e7b5a8-c83a-479a-b9be-8b4d99f27373-lopito.png b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/78e7b5a8-c83a-479a-b9be-8b4d99f27373-lopito.png new file mode 100644 index 0000000..ca9e06b Binary files /dev/null and b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/78e7b5a8-c83a-479a-b9be-8b4d99f27373-lopito.png differ diff --git a/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/79511022-972b-4b60-a8d4-f7f883d1bf1b-teste.jpg b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/79511022-972b-4b60-a8d4-f7f883d1bf1b-teste.jpg new file mode 100644 index 0000000..06def72 --- /dev/null +++ b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/79511022-972b-4b60-a8d4-f7f883d1bf1b-teste.jpg @@ -0,0 +1 @@ +arquivo de teste diff --git a/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/a6bf421e-f0b5-4c06-b4f1-dbf91a780934-teste.jpg b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/a6bf421e-f0b5-4c06-b4f1-dbf91a780934-teste.jpg new file mode 100644 index 0000000..06def72 --- /dev/null +++ b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/a6bf421e-f0b5-4c06-b4f1-dbf91a780934-teste.jpg @@ -0,0 +1 @@ +arquivo de teste diff --git a/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/c8c8f11a-1a55-4b22-b0ee-fc60882401a3-teste.jpg b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/c8c8f11a-1a55-4b22-b0ee-fc60882401a3-teste.jpg new file mode 100644 index 0000000..06def72 --- /dev/null +++ b/services/backend-api/services/backend-api/src/main/resources/static/uploads/questions/c8c8f11a-1a55-4b22-b0ee-fc60882401a3-teste.jpg @@ -0,0 +1 @@ +arquivo de teste diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/MainApplication.java b/services/backend-api/src/main/java/ao/creativemode/kixi/MainApplication.java index 2eb8700..de3db92 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/MainApplication.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/MainApplication.java @@ -1,13 +1,28 @@ package ao.creativemode.kixi; +import io.github.cdimascio.dotenv.Dotenv; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; +import java.util.HashMap; +import java.util.Map; + @SpringBootApplication public class MainApplication { public static void main(String[] args) { - SpringApplication.run(MainApplication.class, args); + Dotenv dotenv = Dotenv.configure() + .directory("./services/backend-api") + .ignoreIfMissing() + .load(); + + SpringApplication app = new SpringApplication(MainApplication.class); + + Map properties = new HashMap<>(); + dotenv.entries().forEach(entry -> properties.put(entry.getKey(), entry.getValue())); + + app.setDefaultProperties(properties); + app.run(args); } -} +} \ No newline at end of file diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/client/OcrServiceClient.java b/services/backend-api/src/main/java/ao/creativemode/kixi/client/OcrServiceClient.java new file mode 100644 index 0000000..14eee48 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/client/OcrServiceClient.java @@ -0,0 +1,294 @@ +package ao.creativemode.kixi.client; + +import ao.creativemode.kixi.dto.ocr.OcrResponse; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.io.buffer.DataBuffer; +import org.springframework.core.io.buffer.DataBufferUtils; +import org.springframework.core.io.buffer.DefaultDataBufferFactory; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpStatusCode; +import org.springframework.http.MediaType; +import org.springframework.http.client.MultipartBodyBuilder; +import org.springframework.http.codec.multipart.FilePart; +import org.springframework.stereotype.Component; +import org.springframework.web.reactive.function.BodyInserters; +import org.springframework.web.reactive.function.client.WebClient; +import org.springframework.web.reactive.function.client.WebClientRequestException; +import org.springframework.web.reactive.function.client.WebClientResponseException; + +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; +import reactor.util.retry.Retry; + +import java.time.Duration; +import java.util.List; + +/** + * WebClient-based client for the OCR microservice. + * + * This client handles all communication with the OCR service for + * extracting text from images and PDFs. + * + * Features: + * - Reactive non-blocking HTTP calls + * - Automatic retry with exponential backoff + * - Timeout configuration + * - Error handling and mapping + */ +@Component +public class OcrServiceClient { + + private static final Logger log = LoggerFactory.getLogger(OcrServiceClient.class); + + private final WebClient webClient; + private final Duration timeout; + private final int maxRetries; + + /** + * Construct the OCR service client. + * + * @param ocrServiceUrl Base URL of the OCR service (e.g., http://ocr-service:8000) + * @param timeoutMs Request timeout in milliseconds + * @param maxRetries Maximum number of retry attempts + */ + public OcrServiceClient( + @Value("${ocr.service.url:http://localhost:8000}") String ocrServiceUrl, + @Value("${ocr.service.timeout-ms:120000}") long timeoutMs, + @Value("${ocr.service.max-retries:2}") int maxRetries) { + + this.timeout = Duration.ofMillis(timeoutMs); + this.maxRetries = maxRetries; + + this.webClient = WebClient.builder() + .baseUrl(ocrServiceUrl) + .defaultHeader(HttpHeaders.ACCEPT, MediaType.APPLICATION_JSON_VALUE) + .codecs(configurer -> configurer + .defaultCodecs() + .maxInMemorySize(50 * 1024 * 1024)) // 50MB for large images + .build(); + + log.info("OCR Service Client initialized: url={}, timeout={}ms, maxRetries={}", + ocrServiceUrl, timeoutMs, maxRetries); + } + + /** + * Extract text from uploaded file parts. + * + * @param files List of uploaded file parts (images or PDFs) + * @return Mono containing the OCR response + */ + public Mono extractText(List files) { + if (files == null || files.isEmpty()) { + return Mono.error(new IllegalArgumentException("At least one file is required")); + } + + log.info("Sending OCR request: {} file(s)", files.size()); + + MultipartBodyBuilder builder = new MultipartBodyBuilder(); + + for (FilePart file : files) { + builder.asyncPart("images", file.content(), DataBuffer.class) + .filename(file.filename()) + .contentType(getContentType(file.filename())); + } + + return webClient.post() + .uri("/ocr/v1/extract") + .contentType(MediaType.MULTIPART_FORM_DATA) + .body(BodyInserters.fromMultipartData(builder.build())) + .retrieve() + .onStatus(HttpStatusCode::is4xxClientError, response -> + response.bodyToMono(String.class) + .flatMap(body -> Mono.error(new OcrClientException( + "OCR request failed: " + body, + response.statusCode().value())))) + .onStatus(HttpStatusCode::is5xxServerError, response -> + response.bodyToMono(String.class) + .flatMap(body -> Mono.error(new OcrServerException( + "OCR service error: " + body, + response.statusCode().value())))) + .bodyToMono(OcrResponse.class) + .timeout(timeout) + .retryWhen(Retry.backoff(maxRetries, Duration.ofSeconds(1)) + .filter(this::isRetryable) + .doBeforeRetry(signal -> log.warn( + "Retrying OCR request, attempt {}: {}", + signal.totalRetries() + 1, + signal.failure().getMessage()))) + .doOnSuccess(response -> log.info( + "OCR request successful: requestId={}, status={}, confidence={}", + response.requestId(), + response.status(), + response.overallConfidence())) + .doOnError(error -> log.error("OCR request failed", error)); + } + + /** + * Extract text from raw image bytes. + * + * @param imageBytes Raw image bytes + * @param filename Original filename for content type detection + * @return Mono containing the OCR response + */ + public Mono extractTextFromBytes(byte[] imageBytes, String filename) { + if (imageBytes == null || imageBytes.length == 0) { + return Mono.error(new IllegalArgumentException("Image bytes cannot be empty")); + } + + log.info("Sending OCR request for single image: {} ({} bytes)", filename, imageBytes.length); + + MultipartBodyBuilder builder = new MultipartBodyBuilder(); + builder.part("images", imageBytes) + .filename(filename) + .contentType(getContentType(filename)); + + return webClient.post() + .uri("/ocr/v1/extract") + .contentType(MediaType.MULTIPART_FORM_DATA) + .body(BodyInserters.fromMultipartData(builder.build())) + .retrieve() + .onStatus(HttpStatusCode::is4xxClientError, response -> + response.bodyToMono(String.class) + .flatMap(body -> Mono.error(new OcrClientException( + "OCR request failed: " + body, + response.statusCode().value())))) + .onStatus(HttpStatusCode::is5xxServerError, response -> + response.bodyToMono(String.class) + .flatMap(body -> Mono.error(new OcrServerException( + "OCR service error: " + body, + response.statusCode().value())))) + .bodyToMono(OcrResponse.class) + .timeout(timeout) + .retryWhen(Retry.backoff(maxRetries, Duration.ofSeconds(1)) + .filter(this::isRetryable)) + .doOnSuccess(response -> log.info( + "OCR request successful: requestId={}, status={}", + response.requestId(), + response.status())) + .doOnError(error -> log.error("OCR request failed", error)); + } + + /** + * Simple health check for the OCR service. + * + * @return Mono containing true if the service is healthy + */ + public Mono healthCheck() { + return webClient.get() + .uri("/health") + .retrieve() + .bodyToMono(String.class) + .map(response -> true) + .timeout(Duration.ofSeconds(10)) + .onErrorReturn(false) + .doOnNext(healthy -> log.debug("OCR service health check: {}", healthy ? "OK" : "FAILED")); + } + + /** + * Get supported languages from the OCR service. + * + * @return Mono containing the languages response + */ + public Mono getSupportedLanguages() { + return webClient.get() + .uri("/ocr/v1/supported-languages") + .retrieve() + .bodyToMono(SupportedLanguagesResponse.class) + .timeout(Duration.ofSeconds(10)); + } + + /** + * Determine if an exception is retryable. + */ + private boolean isRetryable(Throwable throwable) { + // Retry on network errors and 5xx server errors + if (throwable instanceof WebClientRequestException) { + return true; + } + if (throwable instanceof OcrServerException) { + return true; + } + if (throwable instanceof WebClientResponseException ex) { + return ex.getStatusCode().is5xxServerError(); + } + return false; + } + + /** + * Get content type based on file extension. + */ + private MediaType getContentType(String filename) { + if (filename == null) { + return MediaType.APPLICATION_OCTET_STREAM; + } + + String lowerFilename = filename.toLowerCase(); + + if (lowerFilename.endsWith(".jpg") || lowerFilename.endsWith(".jpeg")) { + return MediaType.IMAGE_JPEG; + } else if (lowerFilename.endsWith(".png")) { + return MediaType.IMAGE_PNG; + } else if (lowerFilename.endsWith(".pdf")) { + return MediaType.APPLICATION_PDF; + } else if (lowerFilename.endsWith(".webp")) { + return MediaType.parseMediaType("image/webp"); + } else if (lowerFilename.endsWith(".gif")) { + return MediaType.IMAGE_GIF; + } else if (lowerFilename.endsWith(".bmp")) { + return MediaType.parseMediaType("image/bmp"); + } else if (lowerFilename.endsWith(".tiff") || lowerFilename.endsWith(".tif")) { + return MediaType.parseMediaType("image/tiff"); + } + + return MediaType.APPLICATION_OCTET_STREAM; + } + + /** + * Response for supported languages endpoint. + */ + public record SupportedLanguagesResponse( + List languages, + String defaultLanguage + ) { + public record LanguageInfo( + String code, + String name, + boolean primary + ) {} + } + + /** + * Exception for client-side (4xx) errors from the OCR service. + */ + public static class OcrClientException extends RuntimeException { + private final int statusCode; + + public OcrClientException(String message, int statusCode) { + super(message); + this.statusCode = statusCode; + } + + public int getStatusCode() { + return statusCode; + } + } + + /** + * Exception for server-side (5xx) errors from the OCR service. + */ + public static class OcrServerException extends RuntimeException { + private final int statusCode; + + public OcrServerException(String message, int statusCode) { + super(message); + this.statusCode = statusCode; + } + + public int getStatusCode() { + return statusCode; + } + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/common/exception/ApiException.java b/services/backend-api/src/main/java/ao/creativemode/kixi/common/exception/ApiException.java index 0339cff..5f81cf1 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/common/exception/ApiException.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/common/exception/ApiException.java @@ -2,18 +2,38 @@ import org.springframework.http.HttpStatus; +/** + * Custom API exception with HTTP status support. + * + * Provides factory methods for common HTTP error statuses + * and allows the status to be retrieved for proper response handling. + */ public class ApiException extends RuntimeException { + private final HttpStatus status; private final String title; private final String code; + /** + * Create an API exception with status and message. + * + * @param status HTTP status code + * @param message Error message + */ public ApiException(HttpStatus status, String message) { super(message); this.status = status; - this.title = null; + this.title = status.getReasonPhrase(); this.code = null; } + /** + * Create an API exception with status, title, and message. + * + * @param status HTTP status code + * @param title Error title + * @param message Error message + */ public ApiException(HttpStatus status, String title, String message) { super(message); this.status = status; @@ -21,19 +41,266 @@ public ApiException(HttpStatus status, String title, String message) { this.code = null; } + /** + * Create an API exception with status, title, message, and code. + * + * @param status HTTP status code + * @param title Error title + * @param message Error message + * @param code Error code for programmatic handling + */ + public ApiException( + HttpStatus status, + String title, + String message, + String code + ) { + super(message); + this.status = status; + this.title = title; + this.code = code; + } + + /** + * Get the HTTP status code. + * + * @return HTTP status + */ public HttpStatus getStatus() { return status; } - public static ApiException notFound(String message) { - return new ApiException(HttpStatus.NOT_FOUND, "Not Found", message); + /** + * Get the HTTP status code as integer. + * + * @return HTTP status code value + */ + public int getStatusCode() { + return status.value(); } + /** + * Get the error title. + * + * @return Error title + */ + public String getTitle() { + return title; + } + + /** + * Get the error code. + * + * @return Error code or null if not set + */ + public String getCode() { + return code; + } + + // ========================================================================= + // Factory methods for common HTTP errors + // ========================================================================= + + /** + * Create a 400 Bad Request exception. + * + * @param message Error message + * @return ApiException with BAD_REQUEST status + */ public static ApiException badRequest(String message) { return new ApiException(HttpStatus.BAD_REQUEST, "Bad Request", message); } + /** + * Create a 400 Bad Request exception with custom title. + * + * @param title Error title + * @param message Error message + * @return ApiException with BAD_REQUEST status + */ + public static ApiException badRequest(String title, String message) { + return new ApiException(HttpStatus.BAD_REQUEST, title, message); + } + + /** + * Create a 401 Unauthorized exception. + * + * @param message Error message + * @return ApiException with UNAUTHORIZED status + */ + public static ApiException unauthorized(String message) { + return new ApiException( + HttpStatus.UNAUTHORIZED, + "Unauthorized", + message + ); + } + + /** + * Create a 403 Forbidden exception. + * + * @param message Error message + * @return ApiException with FORBIDDEN status + */ + public static ApiException forbidden(String message) { + return new ApiException(HttpStatus.FORBIDDEN, "Forbidden", message); + } + + /** + * Create a 404 Not Found exception. + * + * @param message Error message + * @return ApiException with NOT_FOUND status + */ + public static ApiException notFound(String message) { + return new ApiException(HttpStatus.NOT_FOUND, "Not Found", message); + } + + /** + * Create a 404 Not Found exception for a specific resource. + * + * @param resourceName Name of the resource (e.g., "Statement", "Question") + * @param resourceId ID of the resource + * @return ApiException with NOT_FOUND status + */ + public static ApiException notFound( + String resourceName, + Object resourceId + ) { + return new ApiException( + HttpStatus.NOT_FOUND, + "Not Found", + String.format("%s with ID %s not found", resourceName, resourceId) + ); + } + + /** + * Create a 409 Conflict exception. + * + * @param message Error message + * @return ApiException with CONFLICT status + */ public static ApiException conflict(String message) { return new ApiException(HttpStatus.CONFLICT, "Conflict", message); } -} \ No newline at end of file + + /** + * Create a 409 Conflict exception for duplicate resource. + * + * @param resourceName Name of the resource + * @param field Field that caused the conflict + * @param value Value that already exists + * @return ApiException with CONFLICT status + */ + public static ApiException duplicate( + String resourceName, + String field, + Object value + ) { + return new ApiException( + HttpStatus.CONFLICT, + "Duplicate Resource", + String.format( + "%s with %s '%s' already exists", + resourceName, + field, + value + ) + ); + } + + /** + * Create a 422 Unprocessable Entity exception. + * + * @param message Error message + * @return ApiException with UNPROCESSABLE_ENTITY status + */ + public static ApiException unprocessableEntity(String message) { + return new ApiException( + HttpStatus.UNPROCESSABLE_ENTITY, + "Unprocessable Entity", + message + ); + } + + /** + * Create a 500 Internal Server Error exception. + * + * @param message Error message + * @return ApiException with INTERNAL_SERVER_ERROR status + */ + public static ApiException internalError(String message) { + return new ApiException( + HttpStatus.INTERNAL_SERVER_ERROR, + "Internal Server Error", + message + ); + } + + /** + * Create a 500 Internal Server Error exception with cause. + * + * @param message Error message + * @param cause Original exception + * @return ApiException with INTERNAL_SERVER_ERROR status + */ + public static ApiException internalError(String message, Throwable cause) { + ApiException exception = new ApiException( + HttpStatus.INTERNAL_SERVER_ERROR, + "Internal Server Error", + message + ); + exception.initCause(cause); + return exception; + } + + /** + * Create a 502 Bad Gateway exception. + * + * @param message Error message + * @return ApiException with BAD_GATEWAY status + */ + public static ApiException badGateway(String message) { + return new ApiException(HttpStatus.BAD_GATEWAY, "Bad Gateway", message); + } + + /** + * Create a 503 Service Unavailable exception. + * + * @param message Error message + * @return ApiException with SERVICE_UNAVAILABLE status + */ + public static ApiException serviceUnavailable(String message) { + return new ApiException( + HttpStatus.SERVICE_UNAVAILABLE, + "Service Unavailable", + message + ); + } + + /** + * Create a 504 Gateway Timeout exception. + * + * @param message Error message + * @return ApiException with GATEWAY_TIMEOUT status + */ + public static ApiException gatewayTimeout(String message) { + return new ApiException( + HttpStatus.GATEWAY_TIMEOUT, + "Gateway Timeout", + message + ); + } + + @Override + public String toString() { + return String.format( + "ApiException{status=%d %s, title='%s', message='%s', code='%s'}", + status.value(), + status.getReasonPhrase(), + title, + getMessage(), + code + ); + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/common/exception/GlobalExceptionHandler.java b/services/backend-api/src/main/java/ao/creativemode/kixi/common/exception/GlobalExceptionHandler.java index d6d1e6f..0603479 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/common/exception/GlobalExceptionHandler.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/common/exception/GlobalExceptionHandler.java @@ -1,18 +1,23 @@ package ao.creativemode.kixi.common.exception; -import ao.creativemode.kixi.common.dto.ProblemDetail; +import java.net.URI; +import java.util.Map; +import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.ControllerAdvice; import org.springframework.web.bind.annotation.ExceptionHandler; import org.springframework.web.bind.support.WebExchangeBindException; import org.springframework.web.server.ServerWebExchange; -import reactor.core.publisher.Mono; -import java.net.URI; -import java.util.Map; -import java.util.stream.Collectors; +import ao.creativemode.kixi.client.OcrServiceClient.OcrClientException; +import ao.creativemode.kixi.client.OcrServiceClient.OcrServerException; +import ao.creativemode.kixi.common.dto.ProblemDetail; +import reactor.core.publisher.Mono; /** * Global exception handler for the API. @@ -21,81 +26,224 @@ @ControllerAdvice public class GlobalExceptionHandler { - private static final Logger log = LoggerFactory.getLogger(GlobalExceptionHandler.class); + private static final Logger log = LoggerFactory.getLogger( + GlobalExceptionHandler.class + ); - private static final URI DEFAULT_TYPE = URI.create("https://api.kixi.com/errors"); + private static final URI DEFAULT_TYPE = URI.create( + "https://api.kixi.com/errors" + ); + // Adiciona URI para erros OCR + private static final URI OCR_ERROR_TYPE = URI.create("https://api.kixi.ao/errors/ocr-error"); + /** + * Handle custom API exceptions with proper status codes. + */ @ExceptionHandler(ApiException.class) public Mono> handleApiException( - ApiException ex, - ServerWebExchange exchange) { + ApiException ex, + ServerWebExchange exchange + ) { + HttpStatus status = + ex.getStatus() != null + ? ex.getStatus() + : HttpStatus.INTERNAL_SERVER_ERROR; + int statusCode = status.value(); ProblemDetail problem = ProblemDetail.forStatusAndDetail( - 500, - ex.getMessage() != null ? ex.getMessage() : "API Error occurred").withTitle("API Error"); + statusCode, + ex.getMessage() != null ? ex.getMessage() : "API Error occurred" + ).withTitle(status.getReasonPhrase()); problem = addInstance(exchange, problem); - return Mono.just(ResponseEntity.status(500).body(problem)); + + return Mono.just(ResponseEntity.status(statusCode).body(problem)); } + /** + * Handle validation errors from request body binding. + */ @ExceptionHandler(WebExchangeBindException.class) public Mono> handleValidationErrors( - WebExchangeBindException ex, - ServerWebExchange exchange) { - - Map fieldErrors = ex.getFieldErrors().stream() - .collect(Collectors.toMap( - fieldError -> fieldError.getField(), - fieldError -> { - String msg = fieldError.getDefaultMessage() != null - ? fieldError.getDefaultMessage() - : "Invalid value"; - if (fieldError.getRejectedValue() != null) { - return Map.of( - "message", msg, - "rejectedValue", fieldError.getRejectedValue()); - } - return msg; - })); + WebExchangeBindException ex, + ServerWebExchange exchange + ) { + Map fieldErrors = ex + .getFieldErrors() + .stream() + .collect( + Collectors.toMap( + fieldError -> fieldError.getField(), + fieldError -> { + String msg = + fieldError.getDefaultMessage() != null + ? fieldError.getDefaultMessage() + : "Invalid value"; + if (fieldError.getRejectedValue() != null) { + return Map.of( + "message", + msg, + "rejectedValue", + fieldError.getRejectedValue() + ); + } + return msg; + } + ) + ); ProblemDetail problem = ProblemDetail.validationError( - "Validation failed for one or more fields", - fieldErrors); + "Validation failed for one or more fields", + fieldErrors + ); problem = addInstance(exchange, problem); return Mono.just(ResponseEntity.badRequest().body(problem)); } + /** + * Handle OCR client exceptions (4xx errors from OCR service). + */ + @ExceptionHandler(OcrClientException.class) + public Mono> handleOcrClientException( + OcrClientException ex, + ServerWebExchange exchange + ) { + log.warn( + "OCR client error: status={}, message={}", + ex.getStatusCode(), + ex.getMessage() + ); + + ProblemDetail problem = new ProblemDetail( + OCR_ERROR_TYPE, + "OCR Processing Error", + ex.getStatusCode(), + ex.getMessage(), + Map.of("service", "ocr-service", "errorType", "client_error") + ); + + problem = addInstance(exchange, problem); + + return Mono.just( + ResponseEntity.status(ex.getStatusCode()).body(problem) + ); + } + + /** + * Handle OCR server exceptions (5xx errors from OCR service). + */ + @ExceptionHandler(OcrServerException.class) + public Mono> handleOcrServerException( + OcrServerException ex, + ServerWebExchange exchange + ) { + log.error( + "OCR server error: status={}, message={}", + ex.getStatusCode(), + ex.getMessage() + ); + + ProblemDetail problem = new ProblemDetail( + OCR_ERROR_TYPE, + "OCR Service Unavailable", + HttpStatus.SERVICE_UNAVAILABLE.value(), + "The OCR service is temporarily unavailable. Please try again later.", + Map.of("service", "ocr-service", "errorType", "server_error") + ); + + problem = addInstance(exchange, problem); + + return Mono.just( + ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE).body(problem) + ); + } + + /** + * Handle timeout exceptions from OCR service calls. + */ + @ExceptionHandler(TimeoutException.class) + public Mono> handleTimeoutException( + TimeoutException ex, + ServerWebExchange exchange + ) { + log.error("Request timeout: {}", ex.getMessage()); + + ProblemDetail problem = new ProblemDetail( + URI.create("https://api.kixi.ao/errors/timeout"), + "Request Timeout", + HttpStatus.GATEWAY_TIMEOUT.value(), + "The request took too long to process. Please try again with a smaller file or fewer images.", + Map.of("errorType", "timeout") + ); + + problem = addInstance(exchange, problem); + + return Mono.just( + ResponseEntity.status(HttpStatus.GATEWAY_TIMEOUT).body(problem) + ); + } + + /** + * Handle illegal argument exceptions (bad requests). + */ + @ExceptionHandler(IllegalArgumentException.class) + public Mono> handleIllegalArgumentException( + IllegalArgumentException ex, + ServerWebExchange exchange + ) { + log.warn("Illegal argument: {}", ex.getMessage()); + + ProblemDetail problem = ProblemDetail.forStatusAndDetail( + HttpStatus.BAD_REQUEST.value(), + ex.getMessage() != null ? ex.getMessage() : "Invalid request" + ).withTitle("Bad Request"); + + problem = addInstance(exchange, problem); + + return Mono.just(ResponseEntity.badRequest().body(problem)); + } + + /** + * Handle all other uncaught exceptions. + */ @ExceptionHandler(Exception.class) public Mono> handleGenericException( - Exception ex, - ServerWebExchange exchange) { - + Exception ex, + ServerWebExchange exchange + ) { log.error("Unhandled exception occurred", ex); ProblemDetail problem = ProblemDetail.forStatusAndDetail( - 500, - "An unexpected error occurred on the server. Please try again later.") - .withTitle("Internal Server Error"); + 500, + "An unexpected error occurred on the server. Please try again later." + ).withTitle("Internal Server Error"); problem = addInstance(exchange, problem); + return Mono.just(ResponseEntity.internalServerError().body(problem)); } /** - * * Adds the 'instance' field with the URI of the current request (RFC 9457 * recommended) */ - private ProblemDetail addInstance(ServerWebExchange exchange, ProblemDetail problem) { + private ProblemDetail addInstance( + ServerWebExchange exchange, + ProblemDetail problem + ) { String requestUri = exchange.getRequest().getURI().toString(); - Map currentProps = problem.properties() != null ? problem.properties() : Map.of(); - Map updatedProps = new java.util.HashMap<>(currentProps); + Map currentProps = + problem.properties() != null ? problem.properties() : Map.of(); + Map updatedProps = new java.util.HashMap<>( + currentProps + ); updatedProps.put("instance", requestUri); return new ProblemDetail( - problem.type(), - problem.title(), - problem.status(), - problem.detail(), - updatedProps); + problem.type(), + problem.title(), + problem.status(), + problem.detail(), + updatedProps + ); } -} \ No newline at end of file +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/config/GoogleOAuth2Properties.java b/services/backend-api/src/main/java/ao/creativemode/kixi/config/GoogleOAuth2Properties.java new file mode 100644 index 0000000..ab89784 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/config/GoogleOAuth2Properties.java @@ -0,0 +1,81 @@ +package ao.creativemode.kixi.config; + +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.stereotype.Component; + +@Component +@ConfigurationProperties(prefix = "app.auth.google") +public class GoogleOAuth2Properties { + + private String clientId; + private String clientSecret; + private String redirectUri = "http://localhost:8080/api/v1/auth/google/callback"; + private String authorizationUri = "https://accounts.google.com/o/oauth2/v2/auth"; + private String tokenUri = "https://oauth2.googleapis.com/token"; + private String userInfoUri = "https://www.googleapis.com/oauth2/v2/userinfo"; + private String scope = "openid email profile"; + + public String getClientId() { + return clientId; + } + + public void setClientId(String clientId) { + this.clientId = clientId; + } + + public String getClientSecret() { + return clientSecret; + } + + public void setClientSecret(String clientSecret) { + this.clientSecret = clientSecret; + } + + public String getRedirectUri() { + return redirectUri; + } + + public void setRedirectUri(String redirectUri) { + this.redirectUri = redirectUri; + } + + public String getAuthorizationUri() { + return authorizationUri; + } + + public void setAuthorizationUri(String authorizationUri) { + this.authorizationUri = authorizationUri; + } + + public String getTokenUri() { + return tokenUri; + } + + public void setTokenUri(String tokenUri) { + this.tokenUri = tokenUri; + } + + public String getUserInfoUri() { + return userInfoUri; + } + + public void setUserInfoUri(String userInfoUri) { + this.userInfoUri = userInfoUri; + } + + public String getScope() { + return scope; + } + + public void setScope(String scope) { + this.scope = scope; + } + + public String buildAuthorizationUrl(String state) { + return authorizationUri + "?client_id=" + clientId + + "&redirect_uri=" + java.net.URLEncoder.encode(redirectUri, java.nio.charset.StandardCharsets.UTF_8) + + "&response_type=code" + + "&scope=" + java.net.URLEncoder.encode(scope, java.nio.charset.StandardCharsets.UTF_8) + + (state != null ? "&state=" + state : ""); + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/config/GroqConfig.java b/services/backend-api/src/main/java/ao/creativemode/kixi/config/GroqConfig.java new file mode 100644 index 0000000..3665ff5 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/config/GroqConfig.java @@ -0,0 +1,25 @@ +package ao.creativemode.kixi.config; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.reactive.function.client.WebClient; + +@Configuration +public class GroqConfig { + + @Value("${groq.api.key:}") + private String apiKey; + + @Value("${groq.api.url:https://api.groq.com/openai/v1}") + private String apiUrl; + + @Bean + public WebClient groqWebClient() { + return WebClient.builder() + .baseUrl(apiUrl) + .defaultHeader("Authorization", "Bearer " + apiKey) + .defaultHeader("Content-Type", "application/json") + .build(); + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/config/JwtProperties.java b/services/backend-api/src/main/java/ao/creativemode/kixi/config/JwtProperties.java new file mode 100644 index 0000000..fa1c1fe --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/config/JwtProperties.java @@ -0,0 +1,28 @@ +package ao.creativemode.kixi.config; + +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.stereotype.Component; + +@Component +@ConfigurationProperties(prefix = "app.jwt") +public class JwtProperties { + + private String secret = "default-secret-change-in-production-min-256-bits"; + private long expirationMs = 86400000L; // 24 hours + + public String getSecret() { + return secret; + } + + public void setSecret(String secret) { + this.secret = secret; + } + + public long getExpirationMs() { + return expirationMs; + } + + public void setExpirationMs(long expirationMs) { + this.expirationMs = expirationMs; + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/config/SecurityConfig.java b/services/backend-api/src/main/java/ao/creativemode/kixi/config/SecurityConfig.java new file mode 100644 index 0000000..756d769 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/config/SecurityConfig.java @@ -0,0 +1,55 @@ +package ao.creativemode.kixi.config; + +import ao.creativemode.kixi.security.JwtAuthenticationFilter; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.http.HttpStatus; +import org.springframework.security.config.annotation.web.reactive.EnableWebFluxSecurity; +import org.springframework.security.config.web.server.ServerHttpSecurity; +import org.springframework.security.crypto.bcrypt.BCryptPasswordEncoder; +import org.springframework.security.crypto.password.PasswordEncoder; +import org.springframework.security.web.server.SecurityWebFilterChain; +import org.springframework.security.web.server.context.NoOpServerSecurityContextRepository; +import org.springframework.security.config.web.server.SecurityWebFiltersOrder; + +@Configuration +@EnableWebFluxSecurity +public class SecurityConfig { + + private final JwtAuthenticationFilter jwtAuthenticationFilter; + + public SecurityConfig(JwtAuthenticationFilter jwtAuthenticationFilter) { + this.jwtAuthenticationFilter = jwtAuthenticationFilter; + } + + @Bean + public SecurityWebFilterChain securityWebFilterChain(ServerHttpSecurity http) { + return http + .csrf(ServerHttpSecurity.CsrfSpec::disable) + .httpBasic(ServerHttpSecurity.HttpBasicSpec::disable) + .formLogin(ServerHttpSecurity.FormLoginSpec::disable) + .securityContextRepository(NoOpServerSecurityContextRepository.getInstance()) + .authorizeExchange(exchange -> exchange + .pathMatchers("/api/v1/auth/**").permitAll() + .pathMatchers("/actuator/health").permitAll() + .anyExchange().authenticated() + ) + .exceptionHandling(handling -> handling + .authenticationEntryPoint((exchange, ex) -> { + exchange.getResponse().setStatusCode(HttpStatus.UNAUTHORIZED); + return exchange.getResponse().setComplete(); + }) + .accessDeniedHandler((exchange, denied) -> { + exchange.getResponse().setStatusCode(HttpStatus.FORBIDDEN); + return exchange.getResponse().setComplete(); + }) + ) + .addFilterAt(jwtAuthenticationFilter, SecurityWebFiltersOrder.AUTHENTICATION) + .build(); + } + + @Bean + public PasswordEncoder passwordEncoder() { + return new BCryptPasswordEncoder(); + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/controller/AuthController.java b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/AuthController.java new file mode 100644 index 0000000..a8be181 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/AuthController.java @@ -0,0 +1,63 @@ +package ao.creativemode.kixi.controller; + +import ao.creativemode.kixi.config.GoogleOAuth2Properties; +import ao.creativemode.kixi.dto.auth.LoginRequest; +import ao.creativemode.kixi.dto.auth.LoginResponse; +import ao.creativemode.kixi.service.AuthService; +import jakarta.validation.Valid; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; +import org.springframework.web.server.ServerWebExchange; +import reactor.core.publisher.Mono; + +import java.net.URI; + +/** + * Autenticação: login tradicional (username/email + password) e login Google OAuth2. + */ +@RestController +@RequestMapping("/api/v1/auth") +public class AuthController { + + private final AuthService authService; + private final GoogleOAuth2Properties googleProperties; + + public AuthController(AuthService authService, GoogleOAuth2Properties googleProperties) { + this.authService = authService; + this.googleProperties = googleProperties; + } + + /** + * Login tradicional: username ou email + password. + * Devolve JWT e roles para uso em Authorization: Bearer <token>. + */ + @PostMapping("/login") + public Mono> login(@Valid @RequestBody LoginRequest request) { + return authService.login(request.usernameOrEmail(), request.password()) + .map(ResponseEntity::ok); + } + + /** + * Redireciona o utilizador para o consentimento Google OAuth2. + * Só funciona se app.auth.google.client-id estiver configurado. + */ + @GetMapping("/google") + public Mono> googleRedirect(ServerWebExchange exchange) { + if (googleProperties.getClientId() == null || googleProperties.getClientId().isBlank()) { + return Mono.just(ResponseEntity.badRequest().build()); + } + String state = java.util.UUID.randomUUID().toString(); + String url = googleProperties.buildAuthorizationUrl(state); + return Mono.just(ResponseEntity.status(302).location(URI.create(url)).build()); + } + + /** + * Callback do Google: troca o code por token, obtém userinfo, encontra/cria account, emite JWT. + * Resposta JSON com accessToken e roles (para uso em Authorization: Bearer <token>). + */ + @GetMapping("/google/callback") + public Mono> googleCallback(@RequestParam String code) { + return authService.loginWithGoogle(code) + .map(ResponseEntity::ok); + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/controller/ChatController.java b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/ChatController.java new file mode 100644 index 0000000..91f0073 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/ChatController.java @@ -0,0 +1,21 @@ +package ao.creativemode.kixi.controller; + +import ao.creativemode.kixi.dto.ChatRequestDto; +import ao.creativemode.kixi.dto.ChatResponseDto; +import ao.creativemode.kixi.service.ChatService; +import lombok.RequiredArgsConstructor; +import org.springframework.web.bind.annotation.*; +import reactor.core.publisher.Mono; + +@RestController +@RequestMapping("/api/chat") +@RequiredArgsConstructor +public class ChatController { + + private final ChatService chatService; + + @PostMapping + public Mono chat(@RequestBody ChatRequestDto request) { + return chatService.chat(request); + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/controller/OcrController.java b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/OcrController.java new file mode 100644 index 0000000..a2e6c85 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/OcrController.java @@ -0,0 +1,609 @@ +package ao.creativemode.kixi.controller; + +import ao.creativemode.kixi.client.OcrServiceClient; +import ao.creativemode.kixi.common.exception.ApiException; +import ao.creativemode.kixi.dto.ocr.ExamExtractionResponse; +import ao.creativemode.kixi.dto.ocr.OcrResponse; +import ao.creativemode.kixi.service.OcrPersistenceService; +import ao.creativemode.kixi.service.OcrPersistenceService.StatementWithRelations; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.http.codec.multipart.FilePart; +import org.springframework.web.bind.annotation.*; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +/** + * REST Controller for OCR operations. + * + * Provides endpoints for extracting text from images and PDFs + * using the OCR microservice, and persisting the results. + * + * Endpoints: + * - POST /api/v1/ocr/extract - Extract text from uploaded images (OCR only) + * - POST /api/v1/ocr/extract-and-persist - Extract and persist to database + * - GET /api/v1/ocr/health - Check OCR service health + * - GET /api/v1/ocr/languages - Get supported OCR languages + */ +@RestController +@RequestMapping("/api/v1/ocr") +public class OcrController { + + private static final Logger log = LoggerFactory.getLogger( + OcrController.class + ); + + private static final Set ALLOWED_EXTENSIONS = Set.of( + ".jpg", + ".jpeg", + ".png", + ".pdf", + ".webp", + ".bmp", + ".tiff", + ".tif" + ); + + private static final long MAX_FILE_SIZE = 20 * 1024 * 1024; // 20MB + private static final int MAX_FILES = 10; + + private final OcrServiceClient ocrServiceClient; + private final OcrPersistenceService ocrPersistenceService; + + public OcrController( + OcrServiceClient ocrServiceClient, + OcrPersistenceService ocrPersistenceService + ) { + this.ocrServiceClient = ocrServiceClient; + this.ocrPersistenceService = ocrPersistenceService; + } + + /** + * Extract text from uploaded images/PDFs (OCR only, no persistence). + * + * @param files List of uploaded file parts (images or PDFs) + * @return OCR extraction result with structured data + */ + @PostMapping( + value = "/extract", + consumes = MediaType.MULTIPART_FORM_DATA_VALUE + ) + public Mono> extractText( + @RequestPart("files") Flux files + ) { + log.info("OCR extraction request received"); + + return files + .collectList() + .flatMap(fileList -> { + // Validate file count + if (fileList.isEmpty()) { + return Mono.error( + ApiException.badRequest("At least one file is required") + ); + } + if (fileList.size() > MAX_FILES) { + return Mono.error( + ApiException.badRequest( + "Maximum " + + MAX_FILES + + " files allowed per request" + ) + ); + } + + // Validate file types + for (FilePart file : fileList) { + if (!isAllowedFileType(file.filename())) { + return Mono.error( + ApiException.badRequest( + "Invalid file type: " + + file.filename() + + ". Allowed: " + + String.join(", ", ALLOWED_EXTENSIONS) + ) + ); + } + } + + log.info( + "Processing {} file(s) for OCR extraction", + fileList.size() + ); + + // Call OCR service + return ocrServiceClient.extractText(fileList); + }) + .map(ocrResponse -> { + // Return appropriate status based on OCR result + if (ocrResponse.isSuccess()) { + return ResponseEntity.ok(ocrResponse); + } else if (ocrResponse.isPartial()) { + return ResponseEntity.status(HttpStatus.MULTI_STATUS).body( + ocrResponse + ); + } else { + return ResponseEntity.status( + HttpStatus.UNPROCESSABLE_ENTITY + ).body(ocrResponse); + } + }) + .doOnSuccess(response -> + log.info( + "OCR extraction completed: status={}", + response.getStatusCode() + ) + ) + .doOnError(error -> log.error("OCR extraction failed", error)); + } + + /** + * Extract text from a single uploaded image/PDF (OCR only). + * + * @param file Single uploaded file part + * @return OCR extraction result with structured data + */ + @PostMapping( + value = "/extract/single", + consumes = MediaType.MULTIPART_FORM_DATA_VALUE + ) + public Mono> extractTextSingle( + @RequestPart("file") FilePart file + ) { + log.info( + "Single-file OCR extraction request received: {}", + file.filename() + ); + + // Validate file type + if (!isAllowedFileType(file.filename())) { + return Mono.error( + ApiException.badRequest( + "Invalid file type: " + + file.filename() + + ". Allowed: " + + String.join(", ", ALLOWED_EXTENSIONS) + ) + ); + } + + return ocrServiceClient + .extractText(List.of(file)) + .map(ocrResponse -> { + if (ocrResponse.isSuccess()) { + return ResponseEntity.ok(ocrResponse); + } else if (ocrResponse.isPartial()) { + return ResponseEntity.status(HttpStatus.MULTI_STATUS).body( + ocrResponse + ); + } else { + return ResponseEntity.status( + HttpStatus.UNPROCESSABLE_ENTITY + ).body(ocrResponse); + } + }) + .doOnSuccess(response -> + log.info( + "Single-file OCR extraction completed: status={}", + response.getStatusCode() + ) + ) + .doOnError(error -> + log.error("Single-file OCR extraction failed", error) + ); + } + + /** + * Extract exam data in structured Angolan format. + * + * Returns the extraction result in the exact JSON structure required + * for Angolan exam papers (12ª classe), including: + * - exam_type, duration_minutes, variant, title, instructions + * - school_year_start, school_year_end, class_grade, course_name, subject_name + * - total_max_score + * - questions with number, subitems, text, type, cotacao, options, has_image + * - images_to_upload with suggested_filename, description, region + * + * @param files List of uploaded file parts (images or PDFs) + * @return Structured exam extraction result + */ + @PostMapping( + value = "/extract/exam", + consumes = MediaType.MULTIPART_FORM_DATA_VALUE + ) + public Mono> extractExam( + @RequestPart("files") Flux files + ) { + log.info("Angolan exam extraction request received"); + + return files + .collectList() + .flatMap(fileList -> { + // Validate file count + if (fileList.isEmpty()) { + return Mono.error( + ApiException.badRequest("At least one file is required") + ); + } + if (fileList.size() > MAX_FILES) { + return Mono.error( + ApiException.badRequest( + "Maximum " + + MAX_FILES + + " files allowed per request" + ) + ); + } + + // Validate file types + for (FilePart file : fileList) { + if (!isAllowedFileType(file.filename())) { + return Mono.error( + ApiException.badRequest( + "Invalid file type: " + + file.filename() + + ". Allowed: " + + String.join(", ", ALLOWED_EXTENSIONS) + ) + ); + } + } + + log.info( + "Processing {} file(s) for Angolan exam extraction", + fileList.size() + ); + + // Call OCR service + return ocrServiceClient.extractText(fileList); + }) + .map(ocrResponse -> { + // Convert to structured exam format + ExamExtractionResponse examResponse = + ExamExtractionResponse.fromOcrResponse(ocrResponse); + + if (ocrResponse.isSuccess()) { + return ResponseEntity.ok(examResponse); + } else if (ocrResponse.isPartial()) { + return ResponseEntity.status(HttpStatus.MULTI_STATUS).body( + examResponse + ); + } else { + return ResponseEntity.status( + HttpStatus.UNPROCESSABLE_ENTITY + ).body(examResponse); + } + }) + .doOnSuccess(response -> + log.info( + "Angolan exam extraction completed: status={}", + response.getStatusCode() + ) + ) + .doOnError(error -> + log.error("Angolan exam extraction failed", error) + ); + } + + /** + * Extract text from uploaded images/PDFs and persist to database. + * + * This endpoint performs full OCR extraction and creates/updates: + * - SchoolYear (lookup or create by start_year/end_year) + * - Course (lookup or create by name) + * - Subject (lookup or create by name) + * - Class (lookup or create by grade/course/school_year) + * - Statement with Questions and Options + * + * @param files List of uploaded file parts (images or PDFs) + * @param createdBy Optional user ID who is creating the statement + * @return Created statement with all related entities + */ + @PostMapping( + value = "/extract-and-persist", + consumes = MediaType.MULTIPART_FORM_DATA_VALUE + ) + public Mono< + ResponseEntity + > extractAndPersist( + @RequestPart("files") Flux files, + @RequestParam(value = "createdBy", required = false) Long createdBy + ) { + log.info( + "OCR extraction and persistence request received, createdBy={}", + createdBy + ); + + return files + .collectList() + .flatMap(fileList -> { + // Validate file count + if (fileList.isEmpty()) { + return Mono.error( + ApiException.badRequest("At least one file is required") + ); + } + if (fileList.size() > MAX_FILES) { + return Mono.error( + ApiException.badRequest( + "Maximum " + + MAX_FILES + + " files allowed per request" + ) + ); + } + + // Validate file types + for (FilePart file : fileList) { + if (!isAllowedFileType(file.filename())) { + return Mono.error( + ApiException.badRequest( + "Invalid file type: " + + file.filename() + + ". Allowed: " + + String.join(", ", ALLOWED_EXTENSIONS) + ) + ); + } + } + + log.info( + "Processing {} file(s) for OCR extraction and persistence", + fileList.size() + ); + + // Process and persist + return ocrPersistenceService.processAndPersist( + fileList, + createdBy + ); + }) + .map(result -> { + StatementWithRelationsResponse response = + new StatementWithRelationsResponse( + result.statement().getId(), + result.statement().getTitle(), + result.statement().getExamType(), + result.statement().getVariant(), + result.statement().getDurationMinutes(), + result.statement().getTotalMaxScore(), + result.statement().getOcrConfidence(), + result.statement().getNeedsReview(), + result.schoolYear() != null + ? new SchoolYearInfo( + result.schoolYear().getId(), + result.schoolYear().getStartYear(), + result.schoolYear().getEndYear() + ) + : null, + result.course() != null + ? new EntityInfo( + result.course().getId(), + result.course().getName() + ) + : null, + result.subject() != null + ? new EntityInfo( + result.subject().getId(), + result.subject().getName() + ) + : null, + result.classEntity() != null + ? new ClassInfo( + result.classEntity().getId(), + result.classEntity().getGrade(), + result.classEntity().getCode() + ) + : null, + result.questions() != null + ? result.questions().size() + : 0, + result.imagesToUpload() != null + ? result + .imagesToUpload() + .stream() + .map(img -> + new ImageToUploadInfo( + img.suggestedFilename(), + img.description(), + img.region() + ) + ) + .toList() + : List.of() + ); + return ResponseEntity.status(HttpStatus.CREATED).body(response); + }) + .doOnSuccess(response -> + log.info( + "OCR extraction and persistence completed: statementId={}", + response.getBody() != null + ? response.getBody().statementId() + : "null" + ) + ) + .doOnError(error -> + log.error("OCR extraction and persistence failed", error) + ); + } + + /** + * Check OCR service health. + * + * @return Health status of the OCR service + */ + @GetMapping("/health") + public Mono>> checkHealth() { + return ocrServiceClient + .healthCheck() + .map(healthy -> { + Map response = Map.of( + "service", + "ocr-service", + "status", + healthy ? "healthy" : "unhealthy", + "available", + healthy + ); + return healthy + ? ResponseEntity.ok(response) + : ResponseEntity.status( + HttpStatus.SERVICE_UNAVAILABLE + ).body(response); + }) + .onErrorResume(error -> { + log.error("OCR health check failed", error); + Map response = Map.of( + "service", + "ocr-service", + "status", + "unavailable", + "available", + false, + "error", + error.getMessage() + ); + return Mono.just( + ResponseEntity.status(HttpStatus.SERVICE_UNAVAILABLE).body( + response + ) + ); + }); + } + + /** + * Get supported OCR languages. + * + * @return List of supported languages + */ + @GetMapping("/languages") + public Mono< + ResponseEntity + > getSupportedLanguages() { + return ocrServiceClient + .getSupportedLanguages() + .map(ResponseEntity::ok) + .onErrorResume(error -> { + log.error("Failed to get supported languages", error); + return Mono.error( + ApiException.badRequest( + "Failed to retrieve supported languages" + ) + ); + }); + } + + /** + * Get OCR service information. + * + * @return Information about the OCR service capabilities + */ + @GetMapping("/info") + public Mono>> getInfo() { + return Mono.just( + ResponseEntity.ok( + Map.of( + "service", + "OCR Service", + "version", + "1.0.0", + "supportedFormats", + List.of("JPEG", "PNG", "PDF", "WebP", "BMP", "TIFF"), + "maxFileSize", + MAX_FILE_SIZE, + "maxFiles", + MAX_FILES, + "features", + List.of( + "Text extraction from images", + "PDF multi-page support", + "Question detection and segmentation", + "Multiple choice option extraction", + "Metadata extraction (school year, subject, etc.)", + "Confidence scores for all extracted data", + "Portuguese language optimization", + "Angolan exam format support (12ª classe)", + "Structured exam extraction (exam_type, cotacao, subitems)", + "Automatic entity creation (SchoolYear, Course, Subject, Class)", + "Image region detection (cabecalho, questoes, rodape/coordenacao)" + ), + "endpoints", + Map.of( + "extract", + "POST /api/v1/ocr/extract - Raw OCR extraction", + "extractExam", + "POST /api/v1/ocr/extract/exam - Structured Angolan exam format", + "extractAndPersist", + "POST /api/v1/ocr/extract-and-persist - Extract and save to database" + ) + ) + ) + ); + } + + /** + * Validate file extension. + */ + private boolean isAllowedFileType(String filename) { + if (filename == null || filename.isBlank()) { + return false; + } + + String lowerFilename = filename.toLowerCase(); + return ALLOWED_EXTENSIONS.stream().anyMatch(lowerFilename::endsWith); + } + + // ========================================================================= + // Response DTOs + // ========================================================================= + + /** + * Response DTO for extract-and-persist endpoint. + */ + public record StatementWithRelationsResponse( + Long statementId, + String title, + String examType, + String variant, + Integer durationMinutes, + Double totalMaxScore, + Double ocrConfidence, + Boolean needsReview, + SchoolYearInfo schoolYear, + EntityInfo course, + EntityInfo subject, + ClassInfo classInfo, + Integer questionCount, + List imagesToUpload + ) {} + + /** + * School year info DTO. + */ + public record SchoolYearInfo(Long id, Integer startYear, Integer endYear) {} + + /** + * Generic entity info DTO. + */ + public record EntityInfo(Long id, String name) {} + + /** + * Class info DTO. + */ + public record ClassInfo(Long id, Integer grade, String code) {} + + /** + * Image to upload info DTO. + */ + public record ImageToUploadInfo( + String suggestedFilename, + String description, + String region + ) {} +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/controller/QuestionImageController.java b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/QuestionImageController.java new file mode 100644 index 0000000..d48a740 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/QuestionImageController.java @@ -0,0 +1,126 @@ +package ao.creativemode.kixi.controller; + +import ao.creativemode.kixi.dto.questionimage.QuestionImageRequest; +import ao.creativemode.kixi.dto.questionimage.QuestionImageResponse; +import ao.creativemode.kixi.service.QuestionImageService; +import jakarta.validation.Valid; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.http.codec.multipart.FilePart; +import org.springframework.web.bind.annotation.*; +import org.springframework.web.util.UriComponentsBuilder; +import reactor.core.publisher.Mono; +import java.net.URI; +import java.util.List; + +import static org.springframework.http.HttpStatus.NO_CONTENT; + +@RestController +@RequestMapping("/api/v1/question-images") +public class QuestionImageController { + + private final QuestionImageService service; + + public QuestionImageController(QuestionImageService service) { + this.service = service; + } + + /** + * Retrieves all active question images. + */ + @GetMapping + public Mono>> listAllActive() { + return service.findAllActive() + .collectList() + .map(ResponseEntity::ok); + } + + /** + * Retrieves images associated with a specific question. + */ + @GetMapping("/question/{questionId}") + public Mono>> listByQuestion(@PathVariable Long questionId) { + return service.findByQuestionId(questionId) + .collectList() + .map(ResponseEntity::ok); + } + + /** + * Retrieves all soft-deleted images. + */ + @GetMapping("/trash") + public Mono>> listTrashed() { + return service.findAllDeleted() + .collectList() + .map(ResponseEntity::ok); + } + + /** + * Retrieves a single active image by ID. + */ + @GetMapping("/{id}") + public Mono> getById(@PathVariable Long id) { + return service.findByIdActive(id) + .map(ResponseEntity::ok); + } + + /** + * Creates a new question image entry by uploading a file. + * Consumes multipart/form-data to receive both metadata and the image file. + */ + @PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE) + public Mono> create( + @RequestPart("data") @Valid QuestionImageRequest request, + @RequestPart("file") Mono filePartMono, + UriComponentsBuilder uriBuilder) { + + return service.createWithFile(request, filePartMono) + .map(created -> { + URI location = uriBuilder + .path("/api/v1/question-images/{id}") + .buildAndExpand(created.id()) + .toUri(); + + return ResponseEntity.created(location).body(created); + }); + } + + /** + * Updates an existing active image's metadata. + */ + @PutMapping("/{id}") + public Mono> update( + @PathVariable Long id, + @Valid @RequestBody QuestionImageRequest request) { + + return service.update(id, request) + .map(ResponseEntity::ok); + } + + /** + * Soft-deletes an image. + */ + @DeleteMapping("/{id}") + public Mono> softDelete(@PathVariable Long id) { + return service.softDelete(id) + .thenReturn(ResponseEntity.status(NO_CONTENT).build()); + } + + /** + * Restores a soft-deleted image. + */ + @PostMapping("/{id}/restore") + public Mono> restore(@PathVariable Long id) { + return service.restore(id) + .thenReturn(ResponseEntity.ok().build()); + } + + /** + * Permanently deletes an image from the database and storage. + */ + @DeleteMapping("/{id}/purge") + public Mono> hardDelete(@PathVariable Long id) { + return service.hardDelete(id) + .thenReturn(ResponseEntity.status(NO_CONTENT).build()); + } +} \ No newline at end of file diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/controller/SimulationAnswerController.java b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/SimulationAnswerController.java new file mode 100644 index 0000000..650b3fe --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/SimulationAnswerController.java @@ -0,0 +1,112 @@ +package ao.creativemode.kixi.controller; + +import static org.springframework.http.HttpStatus.NO_CONTENT; + +import ao.creativemode.kixi.dto.simulationanswer.SimulationAnswerRequest; +import ao.creativemode.kixi.dto.simulationanswer.SimulationAnswerResponse; +import ao.creativemode.kixi.service.SimulationAnswerService; +import jakarta.validation.Valid; +import java.net.URI; +import java.util.List; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; +import org.springframework.web.util.UriComponentsBuilder; +import reactor.core.publisher.Mono; + +@RestController +@RequestMapping("/api/v1/simulation-answers") +public class SimulationAnswerController { + + private final SimulationAnswerService service; + + public SimulationAnswerController(SimulationAnswerService service) { + this.service = service; + } + + /** + * Retrieves all active (non-deleted) simulation answers. + */ + @GetMapping + public Mono< + ResponseEntity> + > listAllActive() { + return service.findAllActive().collectList().map(ResponseEntity::ok); + } + + /** + * Retrieves all soft-deleted (trashed) simulation answers. + */ + @GetMapping("/trash") + public Mono>> listTrashed() { + return service.findAllDeleted().collectList().map(ResponseEntity::ok); + } + + /** + * Retrieves a single active simulation answer by ID. + */ + @GetMapping("/{id}") + public Mono> getById( + @PathVariable Long id + ) { + return service.findByIdActive(id).map(ResponseEntity::ok); + } + + /** + * Creates a new simulation answer. + */ + @PostMapping + public Mono> create( + @Valid @RequestBody SimulationAnswerRequest request, + UriComponentsBuilder uriBuilder + ) { + return service + .create(request) + .map(created -> { + URI location = uriBuilder + .path("/api/v1/simulation-answers/{id}") + .buildAndExpand(created.id()) + .toUri(); + + return ResponseEntity.created(location).body(created); + }); + } + + /** + * Updates an existing active simulation answer. + */ + @PutMapping("/{id}") + public Mono> update( + @PathVariable Long id, + @Valid @RequestBody SimulationAnswerRequest request + ) { + return service.update(id, request).map(ResponseEntity::ok); + } + + /** + * Soft-deletes a simulation answer (moves it to trash). + */ + @DeleteMapping("/{id}") + public Mono> softDelete(@PathVariable Long id) { + return service + .softDelete(id) + .thenReturn(ResponseEntity.status(NO_CONTENT).build()); + } + + /** + * Restores a soft-deleted simulation answer from trash. + */ + @PostMapping("/{id}/restore") + public Mono> restore(@PathVariable Long id) { + return service.restore(id).thenReturn(ResponseEntity.ok().build()); + } + + /** + * Permanently deletes a simulation answer (only if already soft-deleted). + */ + @DeleteMapping("/{id}/purge") + public Mono> hardDelete(@PathVariable Long id) { + return service + .hardDelete(id) + .thenReturn(ResponseEntity.status(NO_CONTENT).build()); + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/controller/SimulationController.java b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/SimulationController.java new file mode 100644 index 0000000..7988ba1 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/SimulationController.java @@ -0,0 +1,77 @@ +package ao.creativemode.kixi.controller; + +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; + +import ao.creativemode.kixi.dto.simulation.SimulationRequest; +import jakarta.validation.Valid; +import ao.creativemode.kixi.dto.simulation.SimulationResponse; +import ao.creativemode.kixi.service.SimulationService; +import reactor.core.publisher.Mono; + +import java.util.List; + +@RestController +@RequestMapping("/api/simulations") +public class SimulationController { + + private final SimulationService service; + + public SimulationController(SimulationService service) { + this.service = service; + } + + @GetMapping + public Mono>> findAll() { + return service.findAllActive() + .collectList() + .map(ResponseEntity::ok); + } + + @GetMapping("/trash") + public Mono>> findAllTrashed() { + return service.findAllTrashed() + .collectList() + .map(ResponseEntity::ok); + } + + @GetMapping("/{id}") + public Mono> findById(@PathVariable Long id) { + return service.findById(id) + .map(ResponseEntity::ok) + .defaultIfEmpty(ResponseEntity.notFound().build()); + } + + @PostMapping + public Mono> create(@Valid @RequestBody SimulationRequest dto) { + return service.create(dto) + .map(response -> ResponseEntity.status(HttpStatus.CREATED).body(response)); + } + + @PutMapping("/{id}") + public Mono> update( + @PathVariable Long id, + @Valid @RequestBody SimulationRequest dto) { + return service.update(id, dto) + .map(ResponseEntity::ok); + } + + @DeleteMapping("/{id}") + public Mono> softDelete(@PathVariable Long id) { + return service.softDelete(id) + .then(Mono.just(ResponseEntity.noContent().build())); + } + + @PutMapping("/{id}/restore") + public Mono> restore(@PathVariable Long id) { + return service.restore(id) + .then(Mono.just(ResponseEntity.noContent().build())); + } + + @DeleteMapping("/{id}/permanent") + public Mono> hardDelete(@PathVariable Long id) { + return service.hardDelete(id) + .then(Mono.just(ResponseEntity.noContent().build())); + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/controller/StatementController.java b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/StatementController.java index bfaba01..1302fb4 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/controller/StatementController.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/StatementController.java @@ -1,77 +1,594 @@ package ao.creativemode.kixi.controller; -import ao.creativemode.kixi.dto.schoolyears.StatementRequest; -import ao.creativemode.kixi.dto.schoolyears.StatementResponse; +import ao.creativemode.kixi.common.exception.ApiException; +import ao.creativemode.kixi.model.Question; +import ao.creativemode.kixi.model.QuestionOption; +import ao.creativemode.kixi.model.Statement; import ao.creativemode.kixi.service.StatementService; -import jakarta.validation.Valid; +import ao.creativemode.kixi.service.StatementService.StatementWithQuestions; +import java.net.URI; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; +import org.springframework.http.codec.multipart.FilePart; import org.springframework.web.bind.annotation.*; +import org.springframework.web.util.UriComponentsBuilder; +import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; -import java.util.List; - - +/** + * REST Controller for Statement (exam paper) management. + * + * Provides endpoints for: + * - CRUD operations on statements + * - OCR-based statement creation from images + * - Managing statement visibility and review status + * - Retrieving statements with their questions and options + * + * Base path: /api/v1/statements + */ @RestController -@RequestMapping("/statements") +@RequestMapping("/api/v1/statements") public class StatementController { - private final StatementService service; + private static final Logger log = LoggerFactory.getLogger( + StatementController.class + ); + + private static final Set ALLOWED_EXTENSIONS = Set.of( + ".jpg", + ".jpeg", + ".png", + ".pdf", + ".webp", + ".bmp", + ".tiff", + ".tif" + ); + + private static final int MAX_FILES = 10; + + private final StatementService statementService; - public StatementController(StatementService service) { - this.service = service; + public StatementController(StatementService statementService) { + this.statementService = statementService; } + // ========================================================================= + // OCR Endpoints + // ========================================================================= + + /** + * Create a statement from uploaded images using OCR. + * + * This endpoint receives image files, sends them to the OCR service, + * and creates a Statement with Questions based on the extracted data. + * + * @param files Uploaded image files (multipart/form-data) + * @param uriBuilder URI builder for location header + * @return Created statement with questions and options + */ + @PostMapping( + value = "/ocr/extract", + consumes = MediaType.MULTIPART_FORM_DATA_VALUE + ) + public Mono> createFromOcr( + @RequestPart("files") Flux files, + UriComponentsBuilder uriBuilder + ) { + log.info("OCR statement creation request received"); + + return files + .collectList() + .flatMap(fileList -> { + // Validate file count + if (fileList.isEmpty()) { + return Mono.error( + ApiException.badRequest("At least one file is required") + ); + } + if (fileList.size() > MAX_FILES) { + return Mono.error( + ApiException.badRequest( + "Maximum " + + MAX_FILES + + " files allowed per request" + ) + ); + } + + // Validate file types + for (FilePart file : fileList) { + if (!isAllowedFileType(file.filename())) { + return Mono.error( + ApiException.badRequest( + "Invalid file type: " + + file.filename() + + ". Allowed: " + + String.join(", ", ALLOWED_EXTENSIONS) + ) + ); + } + } + + log.info( + "Processing {} file(s) for OCR-based statement creation", + fileList.size() + ); + + // TODO: Get actual user ID from authentication context + Long createdBy = 1L; // Placeholder + + return statementService.createFromOcr(fileList, createdBy); + }) + .map(result -> { + URI location = uriBuilder + .path("/api/v1/statements/{id}") + .buildAndExpand(result.statement().getId()) + .toUri(); + + StatementOcrResponse response = StatementOcrResponse.from( + result + ); + return ResponseEntity.created(location).body(response); + }) + .doOnSuccess(response -> + log.info( + "Statement created from OCR: id={}", + response.getBody() != null ? response.getBody().id() : null + ) + ) + .doOnError(error -> + log.error("OCR statement creation failed", error) + ); + } + + /** + * Create a statement from a single uploaded image using OCR. + * + * Simplified endpoint for single-file uploads. + * + * @param file Single uploaded image file + * @param uriBuilder URI builder for location header + * @return Created statement with questions and options + */ + @PostMapping( + value = "/ocr/extract/single", + consumes = MediaType.MULTIPART_FORM_DATA_VALUE + ) + public Mono> createFromOcrSingle( + @RequestPart("file") FilePart file, + UriComponentsBuilder uriBuilder + ) { + log.info( + "Single-file OCR statement creation request received: {}", + file.filename() + ); + + // Validate file type + if (!isAllowedFileType(file.filename())) { + return Mono.error( + ApiException.badRequest( + "Invalid file type: " + + file.filename() + + ". Allowed: " + + String.join(", ", ALLOWED_EXTENSIONS) + ) + ); + } + + // TODO: Get actual user ID from authentication context + Long createdBy = 1L; // Placeholder + + return statementService + .createFromOcr(List.of(file), createdBy) + .map(result -> { + URI location = uriBuilder + .path("/api/v1/statements/{id}") + .buildAndExpand(result.statement().getId()) + .toUri(); + + StatementOcrResponse response = StatementOcrResponse.from( + result + ); + + return ResponseEntity.created(location).body(response); + }) + .doOnSuccess(response -> + log.info( + "Statement created from single-file OCR: id={}", + response.getBody() != null ? response.getBody().id() : null + ) + ) + .doOnError(error -> + log.error("Single-file OCR statement creation failed", error) + ); + } + + // ========================================================================= + // Standard CRUD Endpoints + // ========================================================================= + + /** + * Get all active statements. + */ @GetMapping - public Mono>> listAllActive() { - return service.listAllActive() - .map(ResponseEntity::ok); + public Mono>> listAllActive() { + return statementService + .findAllActive() + .map(StatementSummary::from) + .collectList() + .map(ResponseEntity::ok); } - @GetMapping("/trashed") - public Mono>> listTrashed() { - return service.listTrashed() - .map(ResponseEntity::ok); + /** + * Get all statements that need review. + */ + @GetMapping("/review") + public Mono>> listNeedingReview() { + return statementService + .findNeedingReview() + .map(StatementSummary::from) + .collectList() + .map(ResponseEntity::ok); } - + /** + * Get all statements created via OCR. + */ + @GetMapping("/from-ocr") + public Mono>> listFromOcr() { + return statementService + .findFromOcr() + .map(StatementSummary::from) + .collectList() + .map(ResponseEntity::ok); + } + + /** + * Get all deleted (trashed) statements. + */ + @GetMapping("/trash") + public Mono>> listTrashed() { + return statementService + .findAllDeleted() + .map(StatementSummary::from) + .collectList() + .map(ResponseEntity::ok); + } + + /** + * Get a statement by ID. + */ @GetMapping("/{id}") - public Mono> getById(@PathVariable Long id) { - return service.getById(id) - .map(ResponseEntity::ok); + public Mono> getById( + @PathVariable Long id + ) { + return statementService + .findById(id) + .map(StatementSummary::from) + .map(ResponseEntity::ok); + } + + /** + * Get a statement with all its questions and options. + */ + @GetMapping("/{id}/full") + public Mono> getByIdWithQuestions( + @PathVariable Long id + ) { + return statementService + .findByIdWithQuestions(id) + .map(StatementOcrResponse::from) + .map(ResponseEntity::ok); } + /** + * Search statements by title. + */ + @GetMapping("/search") + public Mono>> searchByTitle( + @RequestParam String query + ) { + return statementService + .searchByTitle(query) + .map(StatementSummary::from) + .collectList() + .map(ResponseEntity::ok); + } - @PostMapping - public Mono> create(@Valid @RequestBody StatementRequest request) { - return service.create(request) - .map(response -> ResponseEntity.status(HttpStatus.CREATED).body(response)); + /** + * Get statements by school year. + */ + @GetMapping("/by-school-year/{schoolYearId}") + public Mono>> getBySchoolYear( + @PathVariable Long schoolYearId + ) { + return statementService + .findBySchoolYear(schoolYearId) + .map(StatementSummary::from) + .collectList() + .map(ResponseEntity::ok); } - @PutMapping("/{id}") - public Mono> update( - @PathVariable Long id, - @Valid @RequestBody StatementRequest request) { - return service.update(id, request) - .map(ResponseEntity::ok); + /** + * Get statements by subject. + */ + @GetMapping("/by-subject/{subjectId}") + public Mono>> getBySubject( + @PathVariable Long subjectId + ) { + return statementService + .findBySubject(subjectId) + .map(StatementSummary::from) + .collectList() + .map(ResponseEntity::ok); } + /** + * Soft delete a statement. + */ @DeleteMapping("/{id}") public Mono> softDelete(@PathVariable Long id) { - return service.softDelete(id) - .thenReturn(ResponseEntity.noContent().build()); + return statementService + .softDelete(id) + .thenReturn(ResponseEntity.noContent().build()); } - @PatchMapping("/{id}/restore") + /** + * Restore a soft-deleted statement. + */ + @PostMapping("/{id}/restore") public Mono> restore(@PathVariable Long id) { - return service.restore(id) - .thenReturn(ResponseEntity.noContent().build()); + return statementService + .restore(id) + .thenReturn(ResponseEntity.ok().build()); } - @DeleteMapping("/{id}/hard") + /** + * Permanently delete a statement (only if already soft-deleted). + */ + @DeleteMapping("/{id}/purge") public Mono> hardDelete(@PathVariable Long id) { - return service.hardDelete(id) - .thenReturn(ResponseEntity.noContent().build()); + return statementService + .hardDelete(id) + .thenReturn(ResponseEntity.noContent().build()); + } + + /** + * Approve review and make statement visible. + */ + @PostMapping("/{id}/approve") + public Mono> approveReview( + @PathVariable Long id + ) { + return statementService + .approveReview(id) + .map(StatementSummary::from) + .map(ResponseEntity::ok); + } + + /** + * Set statement visibility. + */ + @PatchMapping("/{id}/visibility") + public Mono> setVisibility( + @PathVariable Long id, + @RequestParam boolean visible + ) { + return statementService + .setVisible(id, visible) + .map(StatementSummary::from) + .map(ResponseEntity::ok); + } + + // ========================================================================= + // Statistics Endpoints + // ========================================================================= + + /** + * Get statement statistics. + */ + @GetMapping("/stats") + public Mono>> getStatistics() { + return Mono.zip( + statementService.countActive(), + statementService.countNeedingReview(), + statementService.countBySource("ocr"), + statementService.countBySource("manual") + ) + .map(tuple -> { + Map stats = new HashMap<>(); + stats.put("totalActive", tuple.getT1()); + stats.put("needingReview", tuple.getT2()); + stats.put("fromOcr", tuple.getT3()); + stats.put("manual", tuple.getT4()); + return stats; + }) + .map(ResponseEntity::ok); + } + + // ========================================================================= + // Helper Methods + // ========================================================================= + + /** + * Validate file extension. + */ + private boolean isAllowedFileType(String filename) { + if (filename == null || filename.isBlank()) { + return false; + } + + String lowerFilename = filename.toLowerCase(); + return ALLOWED_EXTENSIONS.stream().anyMatch(lowerFilename::endsWith); + } + + // ========================================================================= + // Response DTOs + // ========================================================================= + + /** + * Summary response for statement listing. + */ + public record StatementSummary( + Long id, + String title, + String examType, + Integer durationMinutes, + String variant, + Double totalMaxScore, + Boolean visible, + Boolean needsReview, + String source, + Double ocrConfidence, + Long schoolYearId, + Long termId, + Long subjectId, + Long classId + ) { + public static StatementSummary from(Statement statement) { + return new StatementSummary( + statement.getId(), + statement.getTitle(), + statement.getExamType(), + statement.getDurationMinutes(), + statement.getVariant(), + statement.getTotalMaxScore(), + statement.getVisible(), + statement.getNeedsReview(), + statement.getSource(), + statement.getOcrConfidence(), + statement.getSchoolYearId(), + statement.getTermId(), + statement.getSubjectId(), + statement.getClassId() + ); + } + } + + /** + * Full response including questions and options. + */ + public record StatementOcrResponse( + Long id, + String title, + String examType, + Integer durationMinutes, + String variant, + String instructions, + Double totalMaxScore, + Boolean visible, + Boolean needsReview, + String source, + Double ocrConfidence, + String ocrRequestId, + Long schoolYearId, + Long termId, + Long subjectId, + Long classId, + List questions + ) { + public static StatementOcrResponse from(StatementWithQuestions result) { + Statement s = result.statement(); + List questions = result.questions(); + List allOptions = result.options(); + + List questionResponses = questions + .stream() + .map(q -> { + List options = allOptions + .stream() + .filter(opt -> opt.getQuestionId().equals(q.getId())) + .map(OptionResponse::from) + .toList(); + return QuestionResponse.from(q, options); + }) + .toList(); + + return new StatementOcrResponse( + s.getId(), + s.getTitle(), + s.getExamType(), + s.getDurationMinutes(), + s.getVariant(), + s.getInstructions(), + s.getTotalMaxScore(), + s.getVisible(), + s.getNeedsReview(), + s.getSource(), + s.getOcrConfidence(), + s.getOcrRequestId(), + s.getSchoolYearId(), + s.getTermId(), + s.getSubjectId(), + s.getClassId(), + questionResponses + ); + } + } + + /** + * Question response DTO. + */ + public record QuestionResponse( + Long id, + Integer number, + String text, + String questionType, + Double maxScore, + Integer orderIndex, + Double ocrConfidence, + Integer pageIndex, + Boolean needsReview, + List options + ) { + public static QuestionResponse from( + Question q, + List options + ) { + return new QuestionResponse( + q.getId(), + q.getNumber(), + q.getText(), + q.getQuestionType(), + q.getMaxScore(), + q.getOrderIndex(), + q.getOcrConfidence(), + q.getPageIndex(), + q.getNeedsReview(), + options + ); + } + } + + /** + * Option response DTO. + */ + public record OptionResponse( + Long id, + String optionLabel, + String optionText, + Boolean isCorrect, + Integer orderIndex, + Double ocrConfidence + ) { + public static OptionResponse from(QuestionOption o) { + return new OptionResponse( + o.getId(), + o.getOptionLabel(), + o.getOptionText(), + o.getIsCorrect(), + o.getOrderIndex(), + o.getOcrConfidence() + ); + } } -} \ No newline at end of file +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/controller/SubjectController.java b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/SubjectController.java index 3f979eb..d72ee20 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/controller/SubjectController.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/controller/SubjectController.java @@ -29,9 +29,9 @@ public Mono>> listAllActive(){ .map(ResponseEntity::ok); } - @GetMapping("/{id}") - public Mono> getByCode(@PathVariable Long id){ - return service.findByCodeActive(id) + @GetMapping("/{code}") + public Mono> getByCode(@PathVariable String code){ + return service.findByCodeActive(code) .map(ResponseEntity::ok); } @@ -42,6 +42,9 @@ public Mono>> listTrashed(){ .map(ResponseEntity::ok); } + + + @PostMapping public Mono> create( @Valid @RequestBody SubjectRequest request, @@ -50,38 +53,38 @@ public Mono> create( return service.create(request) .map(subject->{ URI uriLocal = uriBuilder - .path("/api/v1/subjects/{id}") - .buildAndExpand(subject.id()) + .path("/api/v1/subjects/{code}") + .buildAndExpand(subject.code()) .toUri(); return ResponseEntity.created(uriLocal).body(subject); }); } - @PutMapping("/{id}") + @PutMapping("/{code}") public Mono> update( - @PathVariable Long id, + @PathVariable String code, @Valid @RequestBody SubjectRequest data ){ - return service.update(id,data) + return service.update(code,data) .map(ResponseEntity::ok); } - @DeleteMapping("/{id}") - public Mono> softDelete(@PathVariable Long id){ - return service.softDelete(id) + @DeleteMapping("/{code}") + public Mono> softDelete(@PathVariable String code){ + return service.softDelete(code) .map(v->ResponseEntity.noContent().build()); } - @PostMapping("/{id}/restore") - public Mono> restore(@PathVariable Long id){ - return service.restore(id) + @PostMapping("/{code}/restore") + public Mono> restore(@PathVariable String code){ + return service.restore(code) .map(v->ResponseEntity.noContent().build()); } - @DeleteMapping("/{id}/purge") - public Mono> hardDelete(@PathVariable Long id){ - return service.hardDelete(id) + @DeleteMapping("/{code}/purge") + public Mono> hardDelete(@PathVariable String code){ + return service.hardDelete(code) .map(v->ResponseEntity.noContent().build()); } } diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ChatMessageDto.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ChatMessageDto.java new file mode 100644 index 0000000..6201dab --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ChatMessageDto.java @@ -0,0 +1,15 @@ +package ao.creativemode.kixi.dto; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class ChatMessageDto { + private String role; + private String content; +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ChatRequestDto.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ChatRequestDto.java new file mode 100644 index 0000000..f781507 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ChatRequestDto.java @@ -0,0 +1,17 @@ +package ao.creativemode.kixi.dto; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.util.List; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class ChatRequestDto { + private String message; + private List history; +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ChatResponseDto.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ChatResponseDto.java new file mode 100644 index 0000000..c492c2b --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ChatResponseDto.java @@ -0,0 +1,16 @@ +package ao.creativemode.kixi.dto; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class ChatResponseDto { + private String message; + private String model; + private Integer tokensUsed; +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/auth/LoginRequest.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/auth/LoginRequest.java new file mode 100644 index 0000000..c3232ce --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/auth/LoginRequest.java @@ -0,0 +1,11 @@ +package ao.creativemode.kixi.dto.auth; + +import jakarta.validation.constraints.NotBlank; + +public record LoginRequest( + @NotBlank(message = "Username or email is required") + String usernameOrEmail, + + @NotBlank(message = "Password is required") + String password +) {} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/auth/LoginResponse.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/auth/LoginResponse.java new file mode 100644 index 0000000..4692b9d --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/auth/LoginResponse.java @@ -0,0 +1,14 @@ +package ao.creativemode.kixi.dto.auth; + +import java.time.Instant; +import java.util.List; + +public record LoginResponse( + String accessToken, + String tokenType, + Instant expiresAt, + Long accountId, + List roles +) { + public static final String TOKEN_TYPE = "Bearer"; +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/classe/ClassRequest.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/classe/ClassRequest.java index e70dd23..b731de8 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/classe/ClassRequest.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/classe/ClassRequest.java @@ -1,20 +1,10 @@ -package ao.creativemode.kixi.dto.classe; - - -import jakarta.validation.constraints.NotBlank; -import jakarta.validation.constraints.NotNull; - - -public record ClassRequest( - @NotBlank(message = "code required") - @NotNull(message = "Code cannot be null") - String code, - @NotBlank(message = "Grade is required") - @NotNull(message = "Grade cannot be null") - String grade, - @NotNull(message="course id cannot be null") - Long courseId, - @NotNull(message="school year id cannot be null") - Long schoolYearId -) { -} +package ao.creativemode.kixi.dto.classe; + +import jakarta.validation.constraints.NotNull; + +public record ClassRequest( + @NotNull(message = "Code cannot be null") String code, + @NotNull(message = "Grade is required") Integer grade, + @NotNull(message = "course id cannot be null") Long courseId, + @NotNull(message = "school year id cannot be null") Long schoolYearId +) {} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/classe/ClassResponse.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/classe/ClassResponse.java index 555528f..6f50cb1 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/classe/ClassResponse.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/classe/ClassResponse.java @@ -1,17 +1,16 @@ -package ao.creativemode.kixi.dto.classe; - -import ao.creativemode.kixi.model.Course; -import ao.creativemode.kixi.model.SchoolYear; - -import java.time.LocalDateTime; - -public record ClassResponse( - Long id, - String code, - String grade, - Course course, - SchoolYear schoolYear, - LocalDateTime createdAt, - LocalDateTime updatedAt, - LocalDateTime deletedAt -) { } +package ao.creativemode.kixi.dto.classe; + +import ao.creativemode.kixi.model.Course; +import ao.creativemode.kixi.model.SchoolYear; +import java.time.LocalDateTime; + +public record ClassResponse( + Long id, + String code, + Integer grade, + Course course, + SchoolYear schoolYear, + LocalDateTime createdAt, + LocalDateTime updatedAt, + LocalDateTime deletedAt +) {} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ocr/ExamExtractionResponse.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ocr/ExamExtractionResponse.java new file mode 100644 index 0000000..906f247 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ocr/ExamExtractionResponse.java @@ -0,0 +1,279 @@ +package ao.creativemode.kixi.dto.ocr; + +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.List; + +/** + * Response DTO for structured exam extraction from OCR. + * + * Represents the complete extracted exam data in Angolan format, + * matching the exact structure required for persistence. + */ +public record ExamExtractionResponse( + @JsonProperty("exam_type") String examType, + + @JsonProperty("duration_minutes") Integer durationMinutes, + + String variant, + + String title, + + String instructions, + + @JsonProperty("school_year_start") Integer schoolYearStart, + + @JsonProperty("school_year_end") Integer schoolYearEnd, + + @JsonProperty("class_grade") String classGrade, + + @JsonProperty("course_name") String courseName, + + @JsonProperty("subject_name") String subjectName, + + @JsonProperty("total_max_score") Double totalMaxScore, + + List questions, + + @JsonProperty("images_to_upload") List imagesToUpload, + + // Processing metadata + @JsonProperty("request_id") String requestId, + + @JsonProperty("processing_time_ms") Integer processingTimeMs, + + @JsonProperty("overall_confidence") Double overallConfidence, + + @JsonProperty("needs_review") Boolean needsReview, + + List warnings +) { + /** + * Create from OcrResponse. + */ + public static ExamExtractionResponse fromOcrResponse(OcrResponse response) { + if (response == null) { + return null; + } + + OcrResponse.OcrMetadata metadata = response.metadata(); + + return new ExamExtractionResponse( + metadata != null && metadata.examType() != null + ? metadata.examType().value() : null, + metadata != null && metadata.durationMinutes() != null + ? metadata.durationMinutes().value() : null, + metadata != null && metadata.variant() != null + ? metadata.variant().value() : null, + metadata != null && metadata.title() != null + ? metadata.title().value() : null, + metadata != null && metadata.instructions() != null + ? metadata.instructions().value() : null, + metadata != null ? metadata.getSchoolYearStartValue() : null, + metadata != null ? metadata.getSchoolYearEndValue() : null, + metadata != null ? metadata.getClassGradeValue() : null, + metadata != null ? metadata.getCourseNameValue() : null, + metadata != null ? metadata.getSubjectNameValue() : null, + metadata != null ? metadata.getTotalMaxScoreValue() : null, + response.questions() != null + ? response.questions().stream() + .map(QuestionData::fromExtractedQuestion) + .toList() + : List.of(), + response.imagesToUpload() != null + ? response.imagesToUpload().stream() + .map(ImageToUploadData::fromImageToUpload) + .toList() + : List.of(), + response.requestId(), + response.processingTimeMs(), + response.overallConfidence(), + response.needsReview(), + response.warnings() != null + ? response.warnings().stream() + .map(WarningData::fromOcrWarning) + .toList() + : List.of() + ); + } + + /** + * Check if extraction was successful. + */ + public boolean isValid() { + return subjectName != null && !subjectName.isBlank(); + } + + /** + * Question data for the response. + */ + public record QuestionData( + String number, + + List subitems, + + String text, + + String type, + + Double cotacao, + + List options, + + @JsonProperty("has_image") Boolean hasImage, + + @JsonProperty("image_description") String imageDescription, + + Double confidence + ) { + /** + * Create from ExtractedQuestion. + */ + public static QuestionData fromExtractedQuestion(OcrResponse.ExtractedQuestion q) { + if (q == null) return null; + + return new QuestionData( + q.number(), + q.subitems() != null ? q.subitems() : List.of(), + q.getTextValue(), + q.getTypeValue(), + q.getCotacaoValue(), + q.options() != null + ? q.options().stream() + .map(OptionData::fromExtractedOption) + .toList() + : null, + q.hasVisualContent(), + q.imageDescription(), + q.confidence() + ); + } + + /** + * Check if this is a multiple choice question. + */ + public boolean isMultipleChoice() { + return "multipla_escolha".equals(type) || + (options != null && !options.isEmpty()); + } + + /** + * Check if this is a dissertative question. + */ + public boolean isDissertativa() { + return "dissertativa".equals(type); + } + } + + /** + * Option data for multiple choice questions. + */ + public record OptionData( + @JsonProperty("option_label") String optionLabel, + + @JsonProperty("option_text") String optionText, + + Double confidence + ) { + /** + * Create from ExtractedOption. + */ + public static OptionData fromExtractedOption(OcrResponse.ExtractedOption opt) { + if (opt == null) return null; + + return new OptionData( + opt.optionLabel(), + opt.optionText(), + opt.confidence() + ); + } + } + + /** + * Image to upload data. + */ + public record ImageToUploadData( + @JsonProperty("suggested_filename") String suggestedFilename, + + String description, + + String region + ) { + /** + * Create from ImageToUpload. + */ + public static ImageToUploadData fromImageToUpload(OcrResponse.ImageToUpload img) { + if (img == null) return null; + + return new ImageToUploadData( + img.suggestedFilename(), + img.description(), + img.region() + ); + } + + /** + * Check if this is a header image. + */ + public boolean isHeader() { + return "cabecalho".equals(region); + } + + /** + * Check if this is a footer/coordination signature. + */ + public boolean isFooter() { + return "rodape".equals(region); + } + + /** + * Check if this is a question image. + */ + public boolean isQuestionImage() { + return region != null && region.startsWith("questao_"); + } + + /** + * Get the question number if this is a question image. + */ + public String getQuestionNumber() { + if (isQuestionImage() && region.length() > 8) { + return region.substring(8); + } + return null; + } + } + + /** + * Warning data. + */ + public record WarningData( + String code, + + String field, + + Double confidence, + + String message + ) { + /** + * Create from OcrWarning. + */ + public static WarningData fromOcrWarning(OcrResponse.OcrWarning w) { + if (w == null) return null; + + return new WarningData( + w.code(), + w.field(), + w.confidence(), + w.message() + ); + } + + /** + * Check if this is a low confidence warning. + */ + public boolean isLowConfidence() { + return "LOW_CONFIDENCE".equals(code); + } + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ocr/OcrResponse.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ocr/OcrResponse.java new file mode 100644 index 0000000..db79497 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/ocr/OcrResponse.java @@ -0,0 +1,345 @@ +package ao.creativemode.kixi.dto.ocr; + +import com.fasterxml.jackson.annotation.JsonProperty; +import java.util.List; + +/** + * OCR Service Response DTO + * + * Maps the JSON response from the OCR microservice to Java objects. + * Optimized for Angolan exam papers (12ª classe). + */ +public record OcrResponse( + String status, + + @JsonProperty("requestId") String requestId, + + @JsonProperty("processingTimeMs") Integer processingTimeMs, + + @JsonProperty("overallConfidence") Double overallConfidence, + + DocumentInfo document, + + OcrMetadata metadata, + + List questions, + + @JsonProperty("imagesToUpload") List imagesToUpload, + + @JsonProperty("unmappedContent") List unmappedContent, + + List warnings, + + @JsonProperty("errorMessage") String errorMessage +) { + /** + * Check if the OCR processing was successful. + */ + public boolean isSuccess() { + return "success".equals(status); + } + + /** + * Check if the OCR processing was partial (some data extracted with low confidence). + */ + public boolean isPartial() { + return "partial".equals(status); + } + + /** + * Check if the OCR processing failed. + */ + public boolean isError() { + return "error".equals(status); + } + + /** + * Check if the result needs human review (low confidence or warnings). + */ + public boolean needsReview() { + return ( + isPartial() || + (overallConfidence != null && overallConfidence < 0.8) || + (warnings != null && !warnings.isEmpty()) + ); + } + + /** + * Document information from OCR. + */ + public record DocumentInfo( + @JsonProperty("pageCount") Integer pageCount, + @JsonProperty("mainLanguage") String mainLanguage, + @JsonProperty("hasTables") Boolean hasTables + ) {} + + /** + * Extracted metadata with confidence scores - Angolan exam format. + */ + public record OcrMetadata( + // New structured fields + @JsonProperty("examType") ConfidenceField examType, + + @JsonProperty("durationMinutes") + ConfidenceField durationMinutes, + + @JsonProperty("variant") ConfidenceField variant, + + @JsonProperty("title") ConfidenceField title, + + @JsonProperty("instructions") ConfidenceField instructions, + + @JsonProperty("schoolYearStart") + ConfidenceField schoolYearStart, + + @JsonProperty("schoolYearEnd") ConfidenceField schoolYearEnd, + + @JsonProperty("classGrade") ConfidenceField classGrade, + + @JsonProperty("courseName") ConfidenceField courseName, + + @JsonProperty("subjectName") ConfidenceField subjectName, + + @JsonProperty("totalMaxScore") ConfidenceField totalMaxScore, + + // Legacy fields for compatibility + @JsonProperty("schoolYear") ConfidenceField schoolYear, + + @JsonProperty("term") ConfidenceField term, + + @JsonProperty("subject") ConfidenceField subject, + + @JsonProperty("course") ConfidenceField course, + + @JsonProperty("class") ConfidenceField classInfo + ) { + /** + * Get the school year start value, returning null if not present. + */ + public Integer getSchoolYearStartValue() { + return schoolYearStart != null ? schoolYearStart.value() : null; + } + + /** + * Get the school year end value, returning null if not present. + */ + public Integer getSchoolYearEndValue() { + return schoolYearEnd != null ? schoolYearEnd.value() : null; + } + + /** + * Get the class grade value, returning null if not present. + */ + public String getClassGradeValue() { + return classGrade != null ? classGrade.value() : null; + } + + /** + * Get the course name value, returning null if not present. + */ + public String getCourseNameValue() { + return courseName != null ? courseName.value() : null; + } + + /** + * Get the subject name value, returning null if not present. + */ + public String getSubjectNameValue() { + return subjectName != null ? subjectName.value() : null; + } + + /** + * Get the total max score value, returning null if not present. + */ + public Double getTotalMaxScoreValue() { + return totalMaxScore != null ? totalMaxScore.value() : null; + } + } + + /** + * Generic confidence field for any value type. + */ + public record ConfidenceField(T value, Double confidence) { + /** + * Check if the field has a value with sufficient confidence. + */ + public boolean isConfident(double threshold) { + return ( + value != null && confidence != null && confidence >= threshold + ); + } + + /** + * Check if the field has low confidence (needs review). + */ + public boolean isLowConfidence(double threshold) { + return ( + value != null && confidence != null && confidence < threshold + ); + } + } + + /** + * Extracted question with all components - Angolan exam format. + */ + public record ExtractedQuestion( + @JsonProperty("number") String number, + + Double confidence, + + @JsonProperty("subitems") List subitems, + + @JsonProperty("subitemsContent") List subitemsContent, + + ConfidenceField text, + + @JsonProperty("type") String type, + + @JsonProperty("cotacao") Double cotacao, + + List options, + + @JsonProperty("hasImage") Boolean hasImage, + + @JsonProperty("imageDescription") String imageDescription, + + @JsonProperty("pageIndex") Integer pageIndex, + + @JsonProperty("startY") Integer startY, + + @JsonProperty("endY") Integer endY + ) { + /** + * Check if this is a multiple choice question. + */ + public boolean isMultipleChoice() { + return options != null && !options.isEmpty(); + } + + /** + * Check if this is a dissertativa (essay/development) question. + */ + public boolean isDissertativa() { + return "dissertativa".equals(type); + } + + /** + * Get the question type value. + */ + public String getTypeValue() { + return type != null ? type : "unknown"; + } + + /** + * Get the text value, returning empty string if not present. + */ + public String getTextValue() { + return text != null && text.value() != null ? text.value() : ""; + } + + /** + * Get the cotação (score) as a Double, returning null if not present. + */ + public Double getCotacaoValue() { + return cotacao; + } + + /** + * Check if the question has visual content. + */ + public boolean hasVisualContent() { + return hasImage != null && hasImage; + } + } + + /** + * Subitem content with label, text and optional cotação. + */ + public record SubitemContent(String label, String text, Double cotacao) {} + + /** + * Extracted question option. + */ + public record ExtractedOption( + @JsonProperty("optionLabel") String optionLabel, + @JsonProperty("optionText") String optionText, + Double confidence + ) {} + + /** + * Image region to be uploaded. + */ + public record ImageToUpload( + @JsonProperty("suggestedFilename") String suggestedFilename, + + String description, + + String region, + + @JsonProperty("pageIndex") Integer pageIndex + ) { + /** + * Check if this is a header/logo image. + */ + public boolean isHeader() { + return "cabecalho".equals(region); + } + + /** + * Check if this is a footer/coordination signature image. + */ + public boolean isFooter() { + return "rodape".equals(region); + } + + /** + * Check if this is a question-related image. + */ + public boolean isQuestionImage() { + return region != null && region.startsWith("questao_"); + } + + /** + * Get the question number if this is a question image. + */ + public String getQuestionNumber() { + if (isQuestionImage() && region.length() > 8) { + return region.substring(8); + } + return null; + } + } + + /** + * Unmapped content that couldn't be categorized. + */ + public record UnmappedContent( + @JsonProperty("pageIndex") Integer pageIndex, + String text, + Double confidence + ) {} + + /** + * Processing warning from OCR service. + */ + public record OcrWarning( + String code, + String field, + Double confidence, + String message + ) { + /** + * Check if this is a low confidence warning. + */ + public boolean isLowConfidence() { + return "LOW_CONFIDENCE".equals(code); + } + + /** + * Check if this is an unknown question type warning. + */ + public boolean isUnknownQuestionType() { + return "UNKNOWN_QUESTION_TYPE".equals(code); + } + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/questionimage/QuestionImageRequest.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/questionimage/QuestionImageRequest.java new file mode 100644 index 0000000..db0bd93 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/questionimage/QuestionImageRequest.java @@ -0,0 +1,16 @@ +package ao.creativemode.kixi.dto.questionimage; + +import jakarta.validation.constraints.NotNull; + +/** + * DTO for creating or updating QuestionImage metadata. + * Note: imageUrl is excluded from the request as it is generated by the server during upload. + */ +public record QuestionImageRequest( + @NotNull(message = "Question ID is required") + Long questionId, + + String caption, + + Integer orderIndex +) {} \ No newline at end of file diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/questionimage/QuestionImageResponse.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/questionimage/QuestionImageResponse.java new file mode 100644 index 0000000..527f5e0 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/questionimage/QuestionImageResponse.java @@ -0,0 +1,14 @@ +package ao.creativemode.kixi.dto.questionimage; + +import java.time.LocalDateTime; + +public record QuestionImageResponse( + Long id, + Long questionId, + String imageUrl, + String caption, + Integer orderIndex, + LocalDateTime createdAt, + LocalDateTime updatedAt, + LocalDateTime deletedAt +) {} \ No newline at end of file diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/schoolyears/StatementRequest.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/schoolyears/StatementRequest.java deleted file mode 100644 index 28dcf1d..0000000 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/schoolyears/StatementRequest.java +++ /dev/null @@ -1,81 +0,0 @@ -package ao.creativemode.kixi.dto.schoolyears; - -import jakarta.validation.constraints.*; - -public class StatementRequest { - - @NotBlank(message = "O título é obrigatório") - @Size(min = 3, max = 255, message = "O título deve ter entre 3 e 255 caracteres") - private String title; - - @NotBlank(message = "O tipo de exame é obrigatório") - private String examType; - - @Positive(message = "A duração deve ser maior que zero") - private Integer durationMinutes; - - @Size(max = 50, message = "A variante deve ter no máximo 50 caracteres") - private String variant; - - @Size(max = 5000, message = "As instruções devem ter no máximo 5000 caracteres") - private String instructions; - - @PositiveOrZero(message = "A pontuação máxima não pode ser negativa") - private Integer totalMaxScore; - - @NotNull(message = "O ano letivo é obrigatório") - private Long schoolYearId; - - @NotNull(message = "O trimestre é obrigatório") - private Long termId; - - @NotNull(message = "A disciplina é obrigatória") - private Long subjectId; - - @NotNull(message = "A turma é obrigatória") - private Long classId; - - private Long courseId; - - private Boolean visible; - - public StatementRequest() {} - - // Getters e Setters - - public String getTitle() { return title; } - public void setTitle(String title) { this.title = title; } - - public String getExamType() { return examType; } - public void setExamType(String examType) { this.examType = examType; } - - public Integer getDurationMinutes() { return durationMinutes; } - public void setDurationMinutes(Integer durationMinutes) { this.durationMinutes = durationMinutes; } - - public String getVariant() { return variant; } - public void setVariant(String variant) { this.variant = variant; } - - public String getInstructions() { return instructions; } - public void setInstructions(String instructions) { this.instructions = instructions; } - - public Integer getTotalMaxScore() { return totalMaxScore; } - public void setTotalMaxScore(Integer totalMaxScore) { this.totalMaxScore = totalMaxScore; } - - public Long getSchoolYearId() { return schoolYearId; } - public void setSchoolYearId(Long schoolYearId) { this.schoolYearId = schoolYearId; } - - public Long getTermId() { return termId; } - public void setTermId(Long termId) { this.termId = termId; } - - public Long getSubjectId() { return subjectId; } - public void setSubjectId(Long subjectId) { this.subjectId = subjectId; } - - public Long getClassId() { return classId; } - public void setClassId(Long classId) { this.classId = classId; } - - public Long getCourseId() { return courseId; } - public void setCourseId(Long courseId) { this.courseId = courseId; } - - public Boolean getVisible() { return visible; } - public void setVisible(Boolean visible) { this.visible = visible; } -} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/schoolyears/StatementResponse.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/schoolyears/StatementResponse.java deleted file mode 100644 index af3fa0e..0000000 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/schoolyears/StatementResponse.java +++ /dev/null @@ -1,73 +0,0 @@ -package ao.creativemode.kixi.dto.schoolyears; - -import java.time.LocalDateTime; - -public class StatementResponse { - - private Long id; - private String examType; - private Integer durationMinutes; - private String variant; - private String title; - private String instructions; - private Integer totalMaxScore; - private Long schoolYearId; - private Long termId; - private Long subjectId; - private Long classId; - private Long courseId; - private Boolean visible; - private LocalDateTime createdAt; - private LocalDateTime updatedAt; - - - public StatementResponse() { - - } - - - public Long getId() { return id; } - public void setId(Long id) { this.id = id; } - - public String getExamType() { return examType; } - public void setExamType(String examType) { this.examType = examType; } - - public Integer getDurationMinutes() { return durationMinutes; } - public void setDurationMinutes(Integer durationMinutes) { this.durationMinutes = durationMinutes; } - - public String getVariant() { return variant; } - public void setVariant(String variant) { this.variant = variant; } - - public String getTitle() { return title; } - public void setTitle(String title) { this.title = title; } - - public String getInstructions() { return instructions; } - public void setInstructions(String instructions) { this.instructions = instructions; } - - public Integer getTotalMaxScore() { return totalMaxScore; } - public void setTotalMaxScore(Integer totalMaxScore) { this.totalMaxScore = totalMaxScore; } - - public Long getSchoolYearId() { return schoolYearId; } - public void setSchoolYearId(Long schoolYearId) { this.schoolYearId = schoolYearId; } - - public Long getTermId() { return termId; } - public void setTermId(Long termId) { this.termId = termId; } - - public Long getSubjectId() { return subjectId; } - public void setSubjectId(Long subjectId) { this.subjectId = subjectId; } - - public Long getClassId() { return classId; } - public void setClassId(Long classId) { this.classId = classId; } - - public Long getCourseId() { return courseId; } - public void setCourseId(Long courseId) { this.courseId = courseId; } - - public Boolean getVisible() { return visible; } - public void setVisible(Boolean visible) { this.visible = visible; } - - public LocalDateTime getCreatedAt() { return createdAt; } - public void setCreatedAt(LocalDateTime createdAt) { this.createdAt = createdAt; } - - public LocalDateTime getUpdatedAt() { return updatedAt; } - public void setUpdatedAt(LocalDateTime updatedAt) { this.updatedAt = updatedAt; } -} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/simulation/SimulationRequest.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/simulation/SimulationRequest.java new file mode 100644 index 0000000..c14431a --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/simulation/SimulationRequest.java @@ -0,0 +1,25 @@ +package ao.creativemode.kixi.dto.simulation; + +import java.time.LocalDateTime; + +import ao.creativemode.kixi.model.SimulationStatus; +import jakarta.validation.constraints.NotNull; + +public record SimulationRequest( + @NotNull(message = "Account ID is required") + Long accountId, + + Long statementId, + + Long schoolYearId, + + LocalDateTime startedAt, + + LocalDateTime finishedAt, + + Integer timeSpentSeconds, + + Double finalScore, + + SimulationStatus status +) {} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/simulation/SimulationResponse.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/simulation/SimulationResponse.java new file mode 100644 index 0000000..5586c36 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/simulation/SimulationResponse.java @@ -0,0 +1,24 @@ +package ao.creativemode.kixi.dto.simulation; + +import ao.creativemode.kixi.dto.accounts.AccountBasicResponse; +import ao.creativemode.kixi.dto.schoolyears.SchoolYearResponse; +import ao.creativemode.kixi.dto.statement.StatementBasicResponse; +import ao.creativemode.kixi.model.SimulationStatus; + +import java.time.LocalDateTime; + +public record SimulationResponse( + Long id, + AccountBasicResponse account, + StatementBasicResponse statement, + SchoolYearResponse schoolYear, + LocalDateTime startedAt, + LocalDateTime finishedAt, + Integer timeSpentSeconds, + Double finalScore, + SimulationStatus status, + LocalDateTime createdAt, + LocalDateTime updatedAt, + LocalDateTime deletedAt +) { +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/simulationanswer/SimulationAnswerRequest.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/simulationanswer/SimulationAnswerRequest.java new file mode 100644 index 0000000..385c199 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/simulationanswer/SimulationAnswerRequest.java @@ -0,0 +1,16 @@ +package ao.creativemode.kixi.dto.simulationanswer; + +import jakarta.validation.constraints.NotNull; +import java.time.LocalDateTime; + +public record SimulationAnswerRequest( + @NotNull(message = "Simulation ID is required") Long simulationId, + + @NotNull(message = "Question ID is required") Long questionId, + + Long selectedOptionId, + + String answerText, + + LocalDateTime answeredAt +) {} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/simulationanswer/SimulationAnswerResponse.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/simulationanswer/SimulationAnswerResponse.java new file mode 100644 index 0000000..04b88d6 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/simulationanswer/SimulationAnswerResponse.java @@ -0,0 +1,17 @@ +package ao.creativemode.kixi.dto.simulationanswer; + +import java.time.LocalDateTime; + +public record SimulationAnswerResponse( + Long id, + Long simulationId, + Long questionId, + Long selectedOptionId, + String answerText, + Float scoreObtained, + Boolean isCorrect, + LocalDateTime answeredAt, + LocalDateTime createdAt, + LocalDateTime updatedAt, + LocalDateTime deletedAt +) {} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/statement/StatementBasicResponse.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/statement/StatementBasicResponse.java new file mode 100644 index 0000000..4e80607 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/statement/StatementBasicResponse.java @@ -0,0 +1,10 @@ +package ao.creativemode.kixi.dto.statement; + +public record StatementBasicResponse( + Long id, + String examType, + String variant, + String title, + Integer durationMinutes, + Double totalMaxScore +) {} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/statement/StatementRequest.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/statement/StatementRequest.java new file mode 100644 index 0000000..cfbdbbe --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/statement/StatementRequest.java @@ -0,0 +1,40 @@ +package ao.creativemode.kixi.dto.statement; + +import jakarta.validation.constraints.*; + +public record StatementRequest( + @NotBlank(message = "O título é obrigatório") + @Size(min = 3, max = 255, message = "O título deve ter entre 3 e 255 caracteres") + String title, + + @NotBlank(message = "O tipo de exame é obrigatório") + String examType, + + @Positive(message = "A duração deve ser maior que zero") + Integer durationMinutes, + + @Size(max = 50, message = "A variante deve ter no máximo 50 caracteres") + String variant, + + @Size(max = 5000, message = "As instruções devem ter no máximo 5000 caracteres") + String instructions, + + @PositiveOrZero(message = "A pontuação máxima não pode ser negativa") + Integer totalMaxScore, + + @NotNull(message = "O ano letivo é obrigatório") + Long schoolYearId, + + @NotNull(message = "O trimestre é obrigatório") + Long termId, + + @NotNull(message = "A disciplina é obrigatória") + Long subjectId, + + @NotNull(message = "A turma é obrigatória") + Long classId, + + Long courseId, + + Boolean visible +) {} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/dto/statement/StatementResponse.java b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/statement/StatementResponse.java new file mode 100644 index 0000000..8002c11 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/dto/statement/StatementResponse.java @@ -0,0 +1,29 @@ +package ao.creativemode.kixi.dto.statement; + +import java.time.LocalDateTime; + +import ao.creativemode.kixi.dto.accounts.AccountBasicResponse; +import ao.creativemode.kixi.dto.classe.ClassResponse; +import ao.creativemode.kixi.dto.courses.CourseResponse; +import ao.creativemode.kixi.dto.schoolyears.SchoolYearResponse; +import ao.creativemode.kixi.dto.subject.SubjectResponse; +import ao.creativemode.kixi.dto.term.TermResponse; + +public record StatementResponse( + Long id, + String examType, + Integer durationMinutes, + String variant, + String title, + String instructions, + Integer totalMaxScore, + SchoolYearResponse schoolYear, + TermResponse term, + SubjectResponse subject, + ClassResponse classInfo, + CourseResponse course, + AccountBasicResponse createdBy, + Boolean visible, + LocalDateTime createdAt, + LocalDateTime updatedAt +) {} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/model/Class.java b/services/backend-api/src/main/java/ao/creativemode/kixi/model/Class.java index da58a0d..d1940f4 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/model/Class.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/model/Class.java @@ -1,52 +1,52 @@ -package ao.creativemode.kixi.model; - -import lombok.Data; -import org.springframework.data.annotation.CreatedDate; -import org.springframework.data.annotation.Id; -import org.springframework.data.annotation.LastModifiedDate; -import org.springframework.data.relational.core.mapping.Column; -import org.springframework.data.relational.core.mapping.Table; - -import java.time.LocalDateTime; - -@Data -@Table("classes") -public class Class { - @Id - private Long id; - @Column("code") - private String code; - - @Column("grade") - private String grade; - - @Column("course_id") - private Long courseId; - - @Column("school_year_id") - private Long schoolYearId; - - @CreatedDate - @Column("created_at") - private LocalDateTime createdAt; - - @LastModifiedDate - @Column("updated_at") - private LocalDateTime updatedAt; - - @Column("deleted_at") - private LocalDateTime deletedAt; - - public void markAsDeleted() { - this.deletedAt = LocalDateTime.now(); - } - - public void restore() { - this.deletedAt = null; - } - - public boolean isDeleted() { - return deletedAt != null; - } - -} +package ao.creativemode.kixi.model; + +import java.time.LocalDateTime; +import lombok.Data; +import org.springframework.data.annotation.CreatedDate; +import org.springframework.data.annotation.Id; +import org.springframework.data.annotation.LastModifiedDate; +import org.springframework.data.relational.core.mapping.Column; +import org.springframework.data.relational.core.mapping.Table; + +@Data +@Table("classes") +public class Class { + + @Id + private Long id; + + @Column("code") + private String code; + + @Column("grade") + private Integer grade; + + @Column("course_id") + private Long courseId; + + @Column("school_year_id") + private Long schoolYearId; + + @CreatedDate + @Column("created_at") + private LocalDateTime createdAt; + + @LastModifiedDate + @Column("updated_at") + private LocalDateTime updatedAt; + + @Column("deleted_at") + private LocalDateTime deletedAt; + + public void markAsDeleted() { + this.deletedAt = LocalDateTime.now(); + } + + public void restore() { + this.deletedAt = null; + } + + public boolean isDeleted() { + return deletedAt != null; + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/model/Course.java b/services/backend-api/src/main/java/ao/creativemode/kixi/model/Course.java index 2ab9b0c..9e3e4e0 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/model/Course.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/model/Course.java @@ -1,5 +1,6 @@ package ao.creativemode.kixi.model; +import java.time.LocalDateTime; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; @@ -9,8 +10,6 @@ import org.springframework.data.relational.core.mapping.Column; import org.springframework.data.relational.core.mapping.Table; -import java.time.LocalDateTime; - @Data @NoArgsConstructor @AllArgsConstructor diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/model/Question.java b/services/backend-api/src/main/java/ao/creativemode/kixi/model/Question.java new file mode 100644 index 0000000..5869e95 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/model/Question.java @@ -0,0 +1,299 @@ +package ao.creativemode.kixi.model; + +import org.springframework.data.annotation.Id; +import org.springframework.data.annotation.CreatedDate; +import org.springframework.data.annotation.LastModifiedDate; +import org.springframework.data.relational.core.mapping.Column; +import org.springframework.data.relational.core.mapping.Table; + +import java.time.LocalDateTime; + +/** + * Question entity representing a single question within an exam statement. + * + * Each question belongs to a Statement and can have multiple QuestionOptions + * if it's a multiple choice question. + * + * Question types: + * - multiple_choice: Has options with one or more correct answers + * - short_answer: Expects a brief text/numeric answer + * - development: Requires an extended written response + * - true_false: Binary true/false answer + */ +@Table("questions") +public class Question { + + @Id + private Long id; + + /** + * Reference to the parent statement + */ + @Column("statement_id") + private Long statementId; + + /** + * Question number within the exam (e.g., 1, 2, 3...) + */ + @Column("number") + private Integer number; + + /** + * The actual question text + */ + @Column("text") + private String text; + + /** + * Type of question: multiple_choice, short_answer, development, true_false + */ + @Column("question_type") + private String questionType; + + /** + * Maximum score/points for this question + */ + @Column("max_score") + private Double maxScore; + + /** + * Order index for display (allows custom ordering independent of question number) + */ + @Column("order_index") + private Integer orderIndex; + + /** + * OCR confidence score for this question (0.0 - 1.0) + */ + @Column("ocr_confidence") + private Double ocrConfidence; + + /** + * Page index where this question was found (for multi-page documents) + */ + @Column("page_index") + private Integer pageIndex; + + /** + * Flag indicating if this question needs human review + */ + @Column("needs_review") + private Boolean needsReview; + + @CreatedDate + @Column("created_at") + private LocalDateTime createdAt; + + @LastModifiedDate + @Column("updated_at") + private LocalDateTime updatedAt; + + @Column("deleted_at") + private LocalDateTime deletedAt; + + // Constructors + + public Question() { + this.needsReview = false; + } + + public Question(Long statementId, Integer number, String text, String questionType) { + this(); + this.statementId = statementId; + this.number = number; + this.text = text; + this.questionType = questionType; + this.orderIndex = number; + } + + // Getters and Setters + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public Long getStatementId() { + return statementId; + } + + public void setStatementId(Long statementId) { + this.statementId = statementId; + } + + public Integer getNumber() { + return number; + } + + public void setNumber(Integer number) { + this.number = number; + } + + public String getText() { + return text; + } + + public void setText(String text) { + this.text = text; + } + + public String getQuestionType() { + return questionType; + } + + public void setQuestionType(String questionType) { + this.questionType = questionType; + } + + public Double getMaxScore() { + return maxScore; + } + + public void setMaxScore(Double maxScore) { + this.maxScore = maxScore; + } + + public Integer getOrderIndex() { + return orderIndex; + } + + public void setOrderIndex(Integer orderIndex) { + this.orderIndex = orderIndex; + } + + public Double getOcrConfidence() { + return ocrConfidence; + } + + public void setOcrConfidence(Double ocrConfidence) { + this.ocrConfidence = ocrConfidence; + } + + public Integer getPageIndex() { + return pageIndex; + } + + public void setPageIndex(Integer pageIndex) { + this.pageIndex = pageIndex; + } + + public Boolean getNeedsReview() { + return needsReview; + } + + public void setNeedsReview(Boolean needsReview) { + this.needsReview = needsReview; + } + + public LocalDateTime getCreatedAt() { + return createdAt; + } + + public void setCreatedAt(LocalDateTime createdAt) { + this.createdAt = createdAt; + } + + public LocalDateTime getUpdatedAt() { + return updatedAt; + } + + public void setUpdatedAt(LocalDateTime updatedAt) { + this.updatedAt = updatedAt; + } + + public LocalDateTime getDeletedAt() { + return deletedAt; + } + + public void setDeletedAt(LocalDateTime deletedAt) { + this.deletedAt = deletedAt; + } + + // Utility methods + + /** + * Mark this question as deleted (soft delete) + */ + public void markAsDeleted() { + this.deletedAt = LocalDateTime.now(); + } + + /** + * Restore a soft-deleted question + */ + public void restore() { + this.deletedAt = null; + } + + /** + * Check if this question is deleted + */ + public boolean isDeleted() { + return deletedAt != null; + } + + /** + * Check if this is a multiple choice question + */ + public boolean isMultipleChoice() { + return "multiple_choice".equals(questionType); + } + + /** + * Check if this is a short answer question + */ + public boolean isShortAnswer() { + return "short_answer".equals(questionType); + } + + /** + * Check if this is a development/essay question + */ + public boolean isDevelopment() { + return "development".equals(questionType); + } + + /** + * Check if this is a true/false question + */ + public boolean isTrueFalse() { + return "true_false".equals(questionType); + } + + /** + * Mark this question for human review + */ + public void markForReview() { + this.needsReview = true; + } + + /** + * Mark this question as reviewed and approved + */ + public void approveReview() { + this.needsReview = false; + } + + /** + * Check if this question has low OCR confidence + */ + public boolean hasLowConfidence(double threshold) { + return ocrConfidence != null && ocrConfidence < threshold; + } + + @Override + public String toString() { + return "Question{" + + "id=" + id + + ", statementId=" + statementId + + ", number=" + number + + ", questionType='" + questionType + '\'' + + ", maxScore=" + maxScore + + ", ocrConfidence=" + ocrConfidence + + ", needsReview=" + needsReview + + '}'; + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/model/QuestionImage.java b/services/backend-api/src/main/java/ao/creativemode/kixi/model/QuestionImage.java new file mode 100644 index 0000000..c73a61a --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/model/QuestionImage.java @@ -0,0 +1,55 @@ +package ao.creativemode.kixi.model; + +import org.springframework.data.annotation.Id; +import org.springframework.data.annotation.CreatedDate; +import org.springframework.data.annotation.LastModifiedDate; +import org.springframework.data.relational.core.mapping.Column; +import org.springframework.data.relational.core.mapping.Table; +import lombok.Data; +import java.time.LocalDateTime; + +@Data +@Table("question_images") +public class QuestionImage { + + @Id + private Long id; + + @Column("question_id") + private Long questionId; + + @Column("image_url") + private String imageUrl; + + @Column("caption") + private String caption; + + @Column("order_index") + private Integer orderIndex; + + @CreatedDate + @Column("created_at") + private LocalDateTime createdAt; + + @LastModifiedDate + @Column("updated_at") + private LocalDateTime updatedAt; + + @Column("deleted_at") + private LocalDateTime deletedAt; + + public QuestionImage() { + } + + public void markAsDeleted() { + this.deletedAt = LocalDateTime.now(); + } + + public void restore() { + this.deletedAt = null; + } + + public boolean isDeleted() { + return deletedAt != null; + } +} \ No newline at end of file diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/model/QuestionOption.java b/services/backend-api/src/main/java/ao/creativemode/kixi/model/QuestionOption.java new file mode 100644 index 0000000..b0ec11d --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/model/QuestionOption.java @@ -0,0 +1,234 @@ +package ao.creativemode.kixi.model; + +import org.springframework.data.annotation.Id; +import org.springframework.data.annotation.CreatedDate; +import org.springframework.data.annotation.LastModifiedDate; +import org.springframework.data.relational.core.mapping.Column; +import org.springframework.data.relational.core.mapping.Table; + +import java.time.LocalDateTime; + +/** + * QuestionOption entity representing an option for a multiple choice question. + * + * Each option belongs to a Question and contains the option label (e.g., A, B, C, D), + * the option text, and whether it is the correct answer. + */ +@Table("question_options") +public class QuestionOption { + + @Id + private Long id; + + /** + * Reference to the parent question + */ + @Column("question_id") + private Long questionId; + + /** + * Option label (e.g., "A", "B", "C", "D", or "1", "2", "3", "4") + */ + @Column("option_label") + private String optionLabel; + + /** + * The text content of this option + */ + @Column("option_text") + private String optionText; + + /** + * Whether this option is the correct answer + */ + @Column("is_correct") + private Boolean isCorrect; + + /** + * Order index for display (allows custom ordering) + */ + @Column("order_index") + private Integer orderIndex; + + /** + * OCR confidence score for this option (0.0 - 1.0) + */ + @Column("ocr_confidence") + private Double ocrConfidence; + + @CreatedDate + @Column("created_at") + private LocalDateTime createdAt; + + @LastModifiedDate + @Column("updated_at") + private LocalDateTime updatedAt; + + @Column("deleted_at") + private LocalDateTime deletedAt; + + // Constructors + + public QuestionOption() { + this.isCorrect = false; + } + + public QuestionOption(Long questionId, String optionLabel, String optionText) { + this(); + this.questionId = questionId; + this.optionLabel = optionLabel; + this.optionText = optionText; + } + + public QuestionOption(Long questionId, String optionLabel, String optionText, Boolean isCorrect) { + this(questionId, optionLabel, optionText); + this.isCorrect = isCorrect; + } + + // Getters and Setters + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public Long getQuestionId() { + return questionId; + } + + public void setQuestionId(Long questionId) { + this.questionId = questionId; + } + + public String getOptionLabel() { + return optionLabel; + } + + public void setOptionLabel(String optionLabel) { + this.optionLabel = optionLabel; + } + + public String getOptionText() { + return optionText; + } + + public void setOptionText(String optionText) { + this.optionText = optionText; + } + + public Boolean getIsCorrect() { + return isCorrect; + } + + public void setIsCorrect(Boolean isCorrect) { + this.isCorrect = isCorrect; + } + + public Integer getOrderIndex() { + return orderIndex; + } + + public void setOrderIndex(Integer orderIndex) { + this.orderIndex = orderIndex; + } + + public Double getOcrConfidence() { + return ocrConfidence; + } + + public void setOcrConfidence(Double ocrConfidence) { + this.ocrConfidence = ocrConfidence; + } + + public LocalDateTime getCreatedAt() { + return createdAt; + } + + public void setCreatedAt(LocalDateTime createdAt) { + this.createdAt = createdAt; + } + + public LocalDateTime getUpdatedAt() { + return updatedAt; + } + + public void setUpdatedAt(LocalDateTime updatedAt) { + this.updatedAt = updatedAt; + } + + public LocalDateTime getDeletedAt() { + return deletedAt; + } + + public void setDeletedAt(LocalDateTime deletedAt) { + this.deletedAt = deletedAt; + } + + // Utility methods + + /** + * Mark this option as deleted (soft delete) + */ + public void markAsDeleted() { + this.deletedAt = LocalDateTime.now(); + } + + /** + * Restore a soft-deleted option + */ + public void restore() { + this.deletedAt = null; + } + + /** + * Check if this option is deleted + */ + public boolean isDeleted() { + return deletedAt != null; + } + + /** + * Mark this option as the correct answer + */ + public void markAsCorrect() { + this.isCorrect = true; + } + + /** + * Mark this option as incorrect + */ + public void markAsIncorrect() { + this.isCorrect = false; + } + + /** + * Check if this option has low OCR confidence + */ + public boolean hasLowConfidence(double threshold) { + return ocrConfidence != null && ocrConfidence < threshold; + } + + /** + * Get a normalized option label (uppercase, trimmed) + */ + public String getNormalizedLabel() { + return optionLabel != null ? optionLabel.toUpperCase().trim() : null; + } + + @Override + public String toString() { + return "QuestionOption{" + + "id=" + id + + ", questionId=" + questionId + + ", optionLabel='" + optionLabel + '\'' + + ", optionText='" + (optionText != null && optionText.length() > 50 + ? optionText.substring(0, 50) + "..." + : optionText) + '\'' + + ", isCorrect=" + isCorrect + + ", ocrConfidence=" + ocrConfidence + + '}'; + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/model/Simulation.java b/services/backend-api/src/main/java/ao/creativemode/kixi/model/Simulation.java new file mode 100644 index 0000000..59c974c --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/model/Simulation.java @@ -0,0 +1,155 @@ +package ao.creativemode.kixi.model; + +import java.time.LocalDateTime; + +import org.springframework.data.annotation.Id; +import org.springframework.data.relational.core.mapping.Column; +import org.springframework.data.relational.core.mapping.Table; + +@Table("simulation") +public class Simulation { + + @Id + private Long id; + + @Column("account_id") + private Long accountId; + + @Column("statement_id") + private Long statementId; + + @Column("school_year_id") + private Long schoolYearId; + + @Column("started_at") + private LocalDateTime startedAt; + + @Column("finished_at") + private LocalDateTime finishedAt; + + @Column("time_spent_seconds") + private Integer timeSpentSeconds; + + @Column("final_score") + private Double finalScore; + + @Column("status") + private SimulationStatus status; + + @Column("created_at") + private LocalDateTime createdAt; + + @Column("updated_at") + private LocalDateTime updatedAt; + + @Column("deleted_at") + private LocalDateTime deletedAt; + + public Simulation() { + this.createdAt = LocalDateTime.now(); + } + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public Long getAccountId() { + return accountId; + } + + public void setAccountId(Long accountId) { + this.accountId = accountId; + } + + public Long getStatementId() { + return statementId; + } + + public void setStatementId(Long statementId) { + this.statementId = statementId; + } + + public Long getSchoolYearId() { + return schoolYearId; + } + + public void setSchoolYearId(Long schoolYearId) { + this.schoolYearId = schoolYearId; + } + + public LocalDateTime getStartedAt() { + return startedAt; + } + + public void setStartedAt(LocalDateTime startedAt) { + this.startedAt = startedAt; + } + + public LocalDateTime getFinishedAt() { + return finishedAt; + } + + public void setFinishedAt(LocalDateTime finishedAt) { + this.finishedAt = finishedAt; + } + + public Integer getTimeSpentSeconds() { + return timeSpentSeconds; + } + + public void setTimeSpentSeconds(Integer timeSpentSeconds) { + this.timeSpentSeconds = timeSpentSeconds; + } + + public Double getFinalScore() { + return finalScore; + } + + public void setFinalScore(Double finalScore) { + this.finalScore = finalScore; + } + + public SimulationStatus getStatus() { + return status; + } + + public void setStatus(SimulationStatus status) { + this.status = status; + } + + public LocalDateTime getCreatedAt() { + return createdAt; + } + + public void setCreatedAt(LocalDateTime createdAt) { + this.createdAt = createdAt; + } + + public LocalDateTime getUpdatedAt() { + return updatedAt; + } + + public void setUpdatedAt(LocalDateTime updatedAt) { + this.updatedAt = updatedAt; + } + + public LocalDateTime getDeletedAt() { + return deletedAt; + } + + public void setDeletedAt(LocalDateTime deletedAt) { + this.deletedAt = deletedAt; + } + + public void markAsDelete() { + this.deletedAt = LocalDateTime.now(); + } + + public void restore() { + this.deletedAt = null; + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/model/SimulationAnswer.java b/services/backend-api/src/main/java/ao/creativemode/kixi/model/SimulationAnswer.java new file mode 100644 index 0000000..ff73f67 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/model/SimulationAnswer.java @@ -0,0 +1,59 @@ +package ao.creativemode.kixi.model; + +import java.time.LocalDateTime; +import lombok.Getter; +import lombok.Setter; +import org.springframework.data.annotation.Id; +import org.springframework.data.relational.core.mapping.Column; +import org.springframework.data.relational.core.mapping.Table; + +@Getter +@Setter +@Table("simulation_answers") +public class SimulationAnswer { + + @Id + private Long id; + + @Column("simulation_id") + private Long simulationId; + + @Column("question_id") + private Long questionId; + + @Column("selected_option_id") + private Long selectedOptionId; + + @Column("answer_text") + private String answerText; + + @Column("score_obtained") + private Float scoreObtained; + + @Column("is_correct") + private Boolean isCorrect; + + @Column("answered_at") + private LocalDateTime answeredAt; + + @Column("created_at") + private LocalDateTime createdAt; + + @Column("updated_at") + private LocalDateTime updatedAt; + + @Column("deleted_at") + private LocalDateTime deletedAt; + + public void markAsDeleted() { + this.deletedAt = LocalDateTime.now(); + } + + public void restore() { + this.deletedAt = null; + } + + public boolean isDeleted() { + return deletedAt != null; + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/model/SimulationStatus.java b/services/backend-api/src/main/java/ao/creativemode/kixi/model/SimulationStatus.java new file mode 100644 index 0000000..4926b1b --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/model/SimulationStatus.java @@ -0,0 +1,7 @@ +package ao.creativemode.kixi.model; + +public enum SimulationStatus { + IN_PROGRESS, + FINISHED, + CANCELLED +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/model/Statement.java b/services/backend-api/src/main/java/ao/creativemode/kixi/model/Statement.java index eb7356e..9f0caae 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/model/Statement.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/model/Statement.java @@ -1,66 +1,392 @@ package ao.creativemode.kixi.model; -import lombok.Data; -import org.springframework.data.annotation.CreatedDate; import org.springframework.data.annotation.Id; +import org.springframework.data.annotation.CreatedDate; import org.springframework.data.annotation.LastModifiedDate; import org.springframework.data.relational.core.mapping.Column; import org.springframework.data.relational.core.mapping.Table; + import java.time.LocalDateTime; -@Data -@Table("statement") +/** + * Statement entity representing an exam paper/test in the system. + * + * This entity stores metadata about exam papers including their type, + * duration, variant, and associated references to school year, term, + * subject, and class. + * + * The actual questions are stored in a separate Question entity with + * a foreign key reference to this statement. + */ +@Table("statements") public class Statement { + @Id private Long id; + /** + * Type of examination (e.g., "Avaliação Periódica", "Exame Final", "Teste Sumativo") + */ @Column("exam_type") private String examType; + /** + * Duration of the exam in minutes + */ @Column("duration_minutes") private Integer durationMinutes; + /** + * Exam variant (e.g., "A", "B", "C") + */ @Column("variant") private String variant; + /** + * Title of the exam/statement + */ @Column("title") private String title; + /** + * Instructions for the exam + */ @Column("instructions") private String instructions; + /** + * Total maximum score for the entire exam + */ @Column("total_max_score") - private Integer totalMaxScore; + private Double totalMaxScore; + /** + * Reference to the school year + */ @Column("school_year_id") private Long schoolYearId; + /** + * Reference to the term/trimester + */ @Column("term_id") private Long termId; + /** + * Reference to the subject + */ @Column("subject_id") private Long subjectId; + /** + * Reference to the class + */ @Column("class_id") private Long classId; + /** + * Reference to the course (optional) + */ @Column("course_id") private Long courseId; - @Column("create_by") + /** + * Reference to the user who created this statement + */ + @Column("created_by") private Long createdBy; + /** + * Visibility flag (true if the statement is visible to students) + */ @Column("visible") private Boolean visible; + /** + * Flag indicating if this statement needs human review (e.g., low OCR confidence) + */ + @Column("needs_review") + private Boolean needsReview; + + /** + * OCR confidence score (0.0 - 1.0) + */ + @Column("ocr_confidence") + private Double ocrConfidence; + + /** + * Original OCR request ID for tracking + */ + @Column("ocr_request_id") + private String ocrRequestId; + + /** + * Source of the statement (e.g., "manual", "ocr", "import") + */ + @Column("source") + private String source; + @CreatedDate - @Column("create_at") + @Column("created_at") private LocalDateTime createdAt; @LastModifiedDate - @Column("update_at") + @Column("updated_at") private LocalDateTime updatedAt; - @Column("delete_at") + @Column("deleted_at") private LocalDateTime deletedAt; + + // Constructors + + public Statement() { + this.visible = false; + this.needsReview = false; + this.source = "manual"; + } + + public Statement(String examType, String title) { + this(); + this.examType = examType; + this.title = title; + } + + // Getters and Setters + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public String getExamType() { + return examType; + } + + public void setExamType(String examType) { + this.examType = examType; + } + + public Integer getDurationMinutes() { + return durationMinutes; + } + + public void setDurationMinutes(Integer durationMinutes) { + this.durationMinutes = durationMinutes; + } + + public String getVariant() { + return variant; + } + + public void setVariant(String variant) { + this.variant = variant; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getInstructions() { + return instructions; + } + + public void setInstructions(String instructions) { + this.instructions = instructions; + } + + public Double getTotalMaxScore() { + return totalMaxScore; + } + + public void setTotalMaxScore(Double totalMaxScore) { + this.totalMaxScore = totalMaxScore; + } + + public Long getSchoolYearId() { + return schoolYearId; + } + + public void setSchoolYearId(Long schoolYearId) { + this.schoolYearId = schoolYearId; + } + + public Long getTermId() { + return termId; + } + + public void setTermId(Long termId) { + this.termId = termId; + } + + public Long getSubjectId() { + return subjectId; + } + + public void setSubjectId(Long subjectId) { + this.subjectId = subjectId; + } + + public Long getClassId() { + return classId; + } + + public void setClassId(Long classId) { + this.classId = classId; + } + + public Long getCourseId() { + return courseId; + } + + public void setCourseId(Long courseId) { + this.courseId = courseId; + } + + public Long getCreatedBy() { + return createdBy; + } + + public void setCreatedBy(Long createdBy) { + this.createdBy = createdBy; + } + + public Boolean getVisible() { + return visible; + } + + public void setVisible(Boolean visible) { + this.visible = visible; + } + + public Boolean getNeedsReview() { + return needsReview; + } + + public void setNeedsReview(Boolean needsReview) { + this.needsReview = needsReview; + } + + public Double getOcrConfidence() { + return ocrConfidence; + } + + public void setOcrConfidence(Double ocrConfidence) { + this.ocrConfidence = ocrConfidence; + } + + public String getOcrRequestId() { + return ocrRequestId; + } + + public void setOcrRequestId(String ocrRequestId) { + this.ocrRequestId = ocrRequestId; + } + + public String getSource() { + return source; + } + + public void setSource(String source) { + this.source = source; + } + + public LocalDateTime getCreatedAt() { + return createdAt; + } + + public void setCreatedAt(LocalDateTime createdAt) { + this.createdAt = createdAt; + } + + public LocalDateTime getUpdatedAt() { + return updatedAt; + } + + public void setUpdatedAt(LocalDateTime updatedAt) { + this.updatedAt = updatedAt; + } + + public LocalDateTime getDeletedAt() { + return deletedAt; + } + + public void setDeletedAt(LocalDateTime deletedAt) { + this.deletedAt = deletedAt; + } + + // Utility methods + + /** + * Mark this statement as deleted (soft delete) + */ + public void markAsDeleted() { + this.deletedAt = LocalDateTime.now(); + } + + /** + * Restore a soft-deleted statement + */ + public void restore() { + this.deletedAt = null; + } + + /** + * Check if this statement is deleted + */ + public boolean isDeleted() { + return deletedAt != null; + } + + /** + * Check if this statement was created via OCR + */ + public boolean isFromOcr() { + return "ocr".equals(source); + } + + /** + * Mark this statement as needing review (e.g., low OCR confidence) + */ + public void markForReview() { + this.needsReview = true; + } + + /** + * Mark this statement as reviewed and approved + */ + public void approveReview() { + this.needsReview = false; + } + + /** + * Set OCR-related metadata + */ + public void setOcrMetadata(String requestId, Double confidence, boolean needsReview) { + this.ocrRequestId = requestId; + this.ocrConfidence = confidence; + this.needsReview = needsReview; + this.source = "ocr"; + } + + @Override + public String toString() { + return "Statement{" + + "id=" + id + + ", examType='" + examType + '\'' + + ", title='" + title + '\'' + + ", variant='" + variant + '\'' + + ", schoolYearId=" + schoolYearId + + ", subjectId=" + subjectId + + ", visible=" + visible + + ", needsReview=" + needsReview + + ", source='" + source + '\'' + + '}'; + } } diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/AccountRepository.java b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/AccountRepository.java index a979ec0..049eaeb 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/AccountRepository.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/AccountRepository.java @@ -19,5 +19,7 @@ public interface AccountRepository extends ReactiveCrudRepository Mono findByUsernameAndDeletedAtIsNull(String username); + Mono findByEmailAndDeletedAtIsNull(String email); + Mono findByUsernameAndIdNotAndDeletedAtIsNull(String username, Long id); } diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/ClassRepository.java b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/ClassRepository.java index 060419e..092d40d 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/ClassRepository.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/ClassRepository.java @@ -1,15 +1,48 @@ -package ao.creativemode.kixi.repository; - - -import org.springframework.data.repository.reactive.ReactiveCrudRepository; -import ao.creativemode.kixi.model.Class; -import reactor.core.publisher.Flux; -import reactor.core.publisher.Mono; - -public interface ClassRepository extends ReactiveCrudRepository { - - Flux findAllByDeletedAtIsNull(); - Flux findAllByDeletedAtIsNotNull(); - Mono findByIdAndDeletedAtIsNull(Long id); - Mono findByIdAndDeletedAtIsNotNull(Long id); -} +package ao.creativemode.kixi.repository; + +import ao.creativemode.kixi.model.Class; +import org.springframework.data.repository.reactive.ReactiveCrudRepository; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +public interface ClassRepository extends ReactiveCrudRepository { + Flux findAllByDeletedAtIsNull(); + + Flux findAllByDeletedAtIsNotNull(); + + Mono findByIdAndDeletedAtIsNull(Long id); + + Mono findByIdAndDeletedAtIsNotNull(Long id); + + /** + * Find a class by grade, course and school year + */ + Mono findByGradeAndCourseIdAndSchoolYearIdAndDeletedAtIsNull( + Integer grade, + Long courseId, + Long schoolYearId + ); + + /** + * Find a class by grade and school year (without course) + */ + Mono findByGradeAndSchoolYearIdAndDeletedAtIsNull( + Integer grade, + Long schoolYearId + ); + + /** + * Find classes by grade + */ + Flux findByGradeAndDeletedAtIsNull(Integer grade); + + /** + * Find classes by course + */ + Flux findByCourseIdAndDeletedAtIsNull(Long courseId); + + /** + * Find classes by school year + */ + Flux findBySchoolYearIdAndDeletedAtIsNull(Long schoolYearId); +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/CourseRepository.java b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/CourseRepository.java index 5b4fc11..0d70b86 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/CourseRepository.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/CourseRepository.java @@ -6,7 +6,6 @@ import reactor.core.publisher.Mono; public interface CourseRepository extends ReactiveCrudRepository { - Mono findByIdAndDeletedAtIsNull(Long id); Flux findAllByDeletedAtIsNull(); @@ -18,4 +17,14 @@ public interface CourseRepository extends ReactiveCrudRepository { Mono findByCodeAndDeletedAtIsNull(String code); Mono findByCodeAndIdNotAndDeletedAtIsNull(String code, Long id); + + /** + * Find a course by name (case-insensitive) + */ + Mono findByNameIgnoreCaseAndDeletedAtIsNull(String name); + + /** + * Find courses by name containing (case-insensitive) + */ + Flux findByNameContainingIgnoreCaseAndDeletedAtIsNull(String name); } diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/QuestionImageRepository.java b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/QuestionImageRepository.java new file mode 100644 index 0000000..9046b10 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/QuestionImageRepository.java @@ -0,0 +1,24 @@ +package ao.creativemode.kixi.repository; + + +import org.springframework.data.repository.reactive.ReactiveCrudRepository; + +import ao.creativemode.kixi.model.QuestionImage; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +public interface QuestionImageRepository extends ReactiveCrudRepository { + + Flux findAllByDeletedAtIsNull(); + + Flux findAllByDeletedAtIsNotNull(); + + Mono findByIdAndDeletedAtIsNull(Long id); + + Mono findByIdAndDeletedAtIsNotNull(Long id); + + /** + * Busca todas as imagens associadas a uma questão específica que não foram deletadas. + */ + Flux findByQuestionIdAndDeletedAtIsNullOrderByOrderIndexAsc(Long questionId); +} \ No newline at end of file diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/QuestionOptionRepository.java b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/QuestionOptionRepository.java new file mode 100644 index 0000000..465fa72 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/QuestionOptionRepository.java @@ -0,0 +1,158 @@ +package ao.creativemode.kixi.repository; + +import ao.creativemode.kixi.model.QuestionOption; + +import org.springframework.data.r2dbc.repository.Query; +import org.springframework.data.r2dbc.repository.R2dbcRepository; +import org.springframework.stereotype.Repository; + +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +/** + * Repository for QuestionOption entity database operations. + * + * Provides reactive CRUD operations and custom queries for + * managing question options (multiple choice answers) in the database. + */ +@Repository +public interface QuestionOptionRepository extends R2dbcRepository { + + /** + * Find all active (non-deleted) options + */ + Flux findAllByDeletedAtIsNull(); + + /** + * Find all options for a specific question + */ + Flux findAllByQuestionIdAndDeletedAtIsNull(Long questionId); + + /** + * Find all options for a question, ordered by label + */ + @Query("SELECT * FROM question_options WHERE question_id = :questionId AND deleted_at IS NULL ORDER BY option_label ASC") + Flux findAllByQuestionIdOrderedByLabel(Long questionId); + + /** + * Find all options for a question, ordered by order_index + */ + @Query("SELECT * FROM question_options WHERE question_id = :questionId AND deleted_at IS NULL ORDER BY order_index ASC") + Flux findAllByQuestionIdOrderedByOrderIndex(Long questionId); + + /** + * Find an active option by ID + */ + Mono findByIdAndDeletedAtIsNull(Long id); + + /** + * Find a deleted option by ID + */ + Mono findByIdAndDeletedAtIsNotNull(Long id); + + /** + * Find an option by question ID and label + */ + Mono findByQuestionIdAndOptionLabelAndDeletedAtIsNull(Long questionId, String optionLabel); + + /** + * Find the correct option(s) for a question + */ + Flux findAllByQuestionIdAndIsCorrectTrueAndDeletedAtIsNull(Long questionId); + + /** + * Find the single correct option for a question (for single-answer questions) + */ + @Query("SELECT * FROM question_options WHERE question_id = :questionId AND is_correct = true AND deleted_at IS NULL LIMIT 1") + Mono findCorrectOptionByQuestionId(Long questionId); + + /** + * Find incorrect options for a question + */ + Flux findAllByQuestionIdAndIsCorrectFalseAndDeletedAtIsNull(Long questionId); + + /** + * Find options with OCR confidence below threshold + */ + @Query("SELECT * FROM question_options WHERE ocr_confidence < :threshold AND deleted_at IS NULL ORDER BY ocr_confidence ASC") + Flux findAllWithLowOcrConfidence(Double threshold); + + /** + * Find options with low OCR confidence for a specific question + */ + @Query("SELECT * FROM question_options WHERE question_id = :questionId AND ocr_confidence < :threshold AND deleted_at IS NULL ORDER BY option_label ASC") + Flux findByQuestionIdWithLowOcrConfidence(Long questionId, Double threshold); + + /** + * Count options for a question + */ + Mono countByQuestionIdAndDeletedAtIsNull(Long questionId); + + /** + * Count correct options for a question + */ + Mono countByQuestionIdAndIsCorrectTrueAndDeletedAtIsNull(Long questionId); + + /** + * Check if an option exists by ID and is active + */ + Mono existsByIdAndDeletedAtIsNull(Long id); + + /** + * Check if an option with the same label exists for a question + */ + Mono existsByQuestionIdAndOptionLabelAndDeletedAtIsNull(Long questionId, String optionLabel); + + /** + * Check if a question has a correct option defined + */ + Mono existsByQuestionIdAndIsCorrectTrueAndDeletedAtIsNull(Long questionId); + + /** + * Delete all options for a question (soft delete) + */ + @Query("UPDATE question_options SET deleted_at = CURRENT_TIMESTAMP WHERE question_id = :questionId AND deleted_at IS NULL") + Mono softDeleteAllByQuestionId(Long questionId); + + /** + * Find the next order index for a question + */ + @Query("SELECT COALESCE(MAX(order_index), 0) + 1 FROM question_options WHERE question_id = :questionId AND deleted_at IS NULL") + Mono findNextOrderIndex(Long questionId); + + /** + * Mark all options as incorrect for a question + */ + @Query("UPDATE question_options SET is_correct = false, updated_at = CURRENT_TIMESTAMP WHERE question_id = :questionId AND deleted_at IS NULL") + Mono markAllAsIncorrect(Long questionId); + + /** + * Mark a specific option as correct (and others as incorrect) + */ + @Query("UPDATE question_options SET is_correct = (id = :correctOptionId), updated_at = CURRENT_TIMESTAMP WHERE question_id = :questionId AND deleted_at IS NULL") + Mono setCorrectOption(Long questionId, Long correctOptionId); + + /** + * Search options by text content (case-insensitive partial match) + */ + @Query("SELECT * FROM question_options WHERE LOWER(option_text) LIKE LOWER(CONCAT('%', :searchTerm, '%')) AND deleted_at IS NULL ORDER BY question_id, option_label") + Flux searchByText(String searchTerm); + + /** + * Get average OCR confidence for options of a question + */ + @Query("SELECT AVG(ocr_confidence) FROM question_options WHERE question_id = :questionId AND ocr_confidence IS NOT NULL AND deleted_at IS NULL") + Mono getAverageOcrConfidence(Long questionId); + + /** + * Find all options for multiple questions + */ + @Query("SELECT * FROM question_options WHERE question_id IN (:questionIds) AND deleted_at IS NULL ORDER BY question_id, order_index") + Flux findAllByQuestionIds(Iterable questionIds); + + /** + * Bulk delete options for multiple questions + */ + @Query("UPDATE question_options SET deleted_at = CURRENT_TIMESTAMP WHERE question_id IN (:questionIds) AND deleted_at IS NULL") + Mono softDeleteAllByQuestionIds(Iterable questionIds); +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/QuestionRepository.java b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/QuestionRepository.java new file mode 100644 index 0000000..fce559d --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/QuestionRepository.java @@ -0,0 +1,218 @@ +package ao.creativemode.kixi.repository; + +import ao.creativemode.kixi.model.Question; +import org.springframework.data.r2dbc.repository.Query; +import org.springframework.data.r2dbc.repository.R2dbcRepository; +import org.springframework.stereotype.Repository; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +/** + * Repository for Question entity database operations. + * + * Provides reactive CRUD operations and custom queries for + * managing exam questions in the database. + */ +@Repository +public interface QuestionRepository extends R2dbcRepository { + /** + * Find all active (non-deleted) questions + */ + Flux findAllByDeletedAtIsNull(); + + /** + * Find all questions for a specific statement + */ + Flux findAllByStatementIdAndDeletedAtIsNull(Long statementId); + + /** + * Find all questions for a statement, ordered by question number + */ + @Query( + "SELECT * FROM questions WHERE statement_id = :statementId AND deleted_at IS NULL ORDER BY number ASC" + ) + Flux findAllByStatementIdOrderedByNumber(Long statementId); + + /** + * Find all questions for a statement, ordered by order_index + */ + @Query( + "SELECT * FROM questions WHERE statement_id = :statementId AND deleted_at IS NULL ORDER BY order_index ASC" + ) + Flux findAllByStatementIdOrderedByOrderIndex(Long statementId); + + /** + * Find all questions for a statement, ordered by order_index (Spring Data naming convention) + */ + Flux findAllByStatementIdOrderByOrderIndex(Long statementId); + + /** + * Find an active question by ID + */ + Mono findByIdAndDeletedAtIsNull(Long id); + + /** + * Find a deleted question by ID + */ + Mono findByIdAndDeletedAtIsNotNull(Long id); + + /** + * Find a question by statement ID and question number + */ + Mono findByStatementIdAndNumberAndDeletedAtIsNull( + Long statementId, + Integer number + ); + + /** + * Find all questions that need review + */ + Flux findAllByNeedsReviewTrueAndDeletedAtIsNull(); + + /** + * Find all questions for a statement that need review + */ + Flux findAllByStatementIdAndNeedsReviewTrueAndDeletedAtIsNull( + Long statementId + ); + + /** + * Find questions by type + */ + Flux findAllByQuestionTypeAndDeletedAtIsNull(String questionType); + + /** + * Find questions by type for a specific statement + */ + Flux findAllByStatementIdAndQuestionTypeAndDeletedAtIsNull( + Long statementId, + String questionType + ); + + /** + * Find multiple choice questions for a statement + */ + @Query( + "SELECT * FROM questions WHERE statement_id = :statementId AND question_type = 'multiple_choice' AND deleted_at IS NULL ORDER BY number ASC" + ) + Flux findMultipleChoiceByStatementId(Long statementId); + + /** + * Find questions with OCR confidence below threshold + */ + @Query( + "SELECT * FROM questions WHERE ocr_confidence < :threshold AND deleted_at IS NULL ORDER BY ocr_confidence ASC" + ) + Flux findAllWithLowOcrConfidence(Double threshold); + + /** + * Find questions with low OCR confidence for a specific statement + */ + @Query( + "SELECT * FROM questions WHERE statement_id = :statementId AND ocr_confidence < :threshold AND deleted_at IS NULL ORDER BY number ASC" + ) + Flux findByStatementIdWithLowOcrConfidence( + Long statementId, + Double threshold + ); + + /** + * Find questions on a specific page + */ + Flux findAllByPageIndexAndDeletedAtIsNull(Integer pageIndex); + + /** + * Find questions on a specific page for a statement + */ + Flux findAllByStatementIdAndPageIndexAndDeletedAtIsNull( + Long statementId, + Integer pageIndex + ); + + /** + * Count questions for a statement + */ + Mono countByStatementIdAndDeletedAtIsNull(Long statementId); + + /** + * Count questions needing review + */ + Mono countByNeedsReviewTrueAndDeletedAtIsNull(); + + /** + * Count questions by type for a statement + */ + Mono countByStatementIdAndQuestionTypeAndDeletedAtIsNull( + Long statementId, + String questionType + ); + + /** + * Check if a question exists by ID and is active + */ + Mono existsByIdAndDeletedAtIsNull(Long id); + + /** + * Check if a question with the same number exists in a statement + */ + Mono existsByStatementIdAndNumberAndDeletedAtIsNull( + Long statementId, + Integer number + ); + + /** + * Delete all questions for a statement (soft delete) + */ + @Query( + "UPDATE questions SET deleted_at = CURRENT_TIMESTAMP WHERE statement_id = :statementId AND deleted_at IS NULL" + ) + Mono softDeleteAllByStatementId(Long statementId); + + /** + * Calculate total max score for a statement + */ + @Query( + "SELECT COALESCE(SUM(max_score), 0) FROM questions WHERE statement_id = :statementId AND deleted_at IS NULL" + ) + Mono calculateTotalMaxScore(Long statementId); + + /** + * Find the next order index for a statement + */ + @Query( + "SELECT COALESCE(MAX(order_index), 0) + 1 FROM questions WHERE statement_id = :statementId AND deleted_at IS NULL" + ) + Mono findNextOrderIndex(Long statementId); + + /** + * Find the next question number for a statement + */ + @Query( + "SELECT COALESCE(MAX(number), 0) + 1 FROM questions WHERE statement_id = :statementId AND deleted_at IS NULL" + ) + Mono findNextQuestionNumber(Long statementId); + + /** + * Search questions by text content (case-insensitive partial match) + */ + @Query( + "SELECT * FROM questions WHERE LOWER(text) LIKE LOWER(CONCAT('%', :searchTerm, '%')) AND deleted_at IS NULL ORDER BY statement_id, number" + ) + Flux searchByText(String searchTerm); + + /** + * Find questions with specific score range + */ + @Query( + "SELECT * FROM questions WHERE max_score >= :minScore AND max_score <= :maxScore AND deleted_at IS NULL ORDER BY max_score DESC" + ) + Flux findByScoreRange(Double minScore, Double maxScore); + + /** + * Get average OCR confidence for a statement + */ + @Query( + "SELECT AVG(ocr_confidence) FROM questions WHERE statement_id = :statementId AND ocr_confidence IS NOT NULL AND deleted_at IS NULL" + ) + Mono getAverageOcrConfidence(Long statementId); +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SchoolYearRepository.java b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SchoolYearRepository.java index 97d9c43..b84e7cc 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SchoolYearRepository.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SchoolYearRepository.java @@ -5,11 +5,24 @@ import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; -public interface SchoolYearRepository extends ReactiveCrudRepository { - +public interface SchoolYearRepository + extends ReactiveCrudRepository +{ Flux findAllByDeletedAtIsNull(); Flux findAllByDeletedAtIsNotNull(); Mono findByIdAndDeletedAtIsNull(Long id); Mono findByIdAndDeletedAtIsNotNull(Long id); - Mono findByStartYearAndEndYearAndIdNot(Integer startYear, Integer endYear, Long id); -} \ No newline at end of file + Mono findByStartYearAndEndYearAndIdNot( + Integer startYear, + Integer endYear, + Long id + ); + + /** + * Find a school year by start and end year + */ + Mono findByStartYearAndEndYearAndDeletedAtIsNull( + Integer startYear, + Integer endYear + ); +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SimulationAnswerRepository.java b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SimulationAnswerRepository.java new file mode 100644 index 0000000..ff942fe --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SimulationAnswerRepository.java @@ -0,0 +1,15 @@ +package ao.creativemode.kixi.repository; + +import ao.creativemode.kixi.model.SimulationAnswer; +import org.springframework.data.repository.reactive.ReactiveCrudRepository; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +public interface SimulationAnswerRepository + extends ReactiveCrudRepository +{ + Flux findAllByDeletedAtIsNull(); + Flux findAllByDeletedAtIsNotNull(); + Mono findByIdAndDeletedAtIsNull(Long id); + Mono findByIdAndDeletedAtIsNotNull(Long id); +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SimulationRepository.java b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SimulationRepository.java new file mode 100644 index 0000000..e846949 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SimulationRepository.java @@ -0,0 +1,16 @@ +package ao.creativemode.kixi.repository; + +import org.springframework.data.repository.reactive.ReactiveCrudRepository; +import org.springframework.stereotype.Repository; + +import ao.creativemode.kixi.model.Simulation; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +@Repository +public interface SimulationRepository extends ReactiveCrudRepository { + Flux findByDeletedAtIsNull(); + Flux findByDeletedAtIsNotNull(); + Mono findByIdAndDeletedAtIsNull(Long id); + Mono findByIdAndDeletedAtIsNotNull(Long id); +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/StatementRepository.java b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/StatementRepository.java index 0f59b82..cc24dcb 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/StatementRepository.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/StatementRepository.java @@ -1,23 +1,170 @@ package ao.creativemode.kixi.repository; import ao.creativemode.kixi.model.Statement; + import org.springframework.data.r2dbc.repository.Query; -import org.springframework.data.repository.reactive.ReactiveCrudRepository; +import org.springframework.data.r2dbc.repository.R2dbcRepository; import org.springframework.stereotype.Repository; + import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +/** + * Repository for Statement entity database operations. + * + * Provides reactive CRUD operations and custom queries for + * managing exam statements in the database. + */ @Repository -public interface StatementRepository extends ReactiveCrudRepository { +public interface StatementRepository extends R2dbcRepository { + + /** + * Find all active (non-deleted) statements + */ + Flux findAllByDeletedAtIsNull(); + + /** + * Find all soft-deleted statements + */ + Flux findAllByDeletedAtIsNotNull(); - Flux findByDeletedAtIsNull(); - Flux findByDeletedAtIsNotNull(); + /** + * Find an active statement by ID + */ Mono findByIdAndDeletedAtIsNull(Long id); - Mono existsByTitleAndDeletedAtIsNull(String title); - Flux findBySchoolYearIdAndDeletedAtIsNull(Long schoolYearId); - Flux findBySubjectIdAndDeletedAtIsNull(Long subjectId); - Flux findByClassIdAndDeletedAtIsNull(Long classId); - @Query("SELECT COUNT(*) FROM statement WHERE delete_at IS NULL") - Mono countActive(); + /** + * Find a deleted statement by ID + */ + Mono findByIdAndDeletedAtIsNotNull(Long id); + + /** + * Find all visible statements + */ + Flux findAllByVisibleTrueAndDeletedAtIsNull(); + + /** + * Find all statements that need review + */ + Flux findAllByNeedsReviewTrueAndDeletedAtIsNull(); + + /** + * Find statements by school year + */ + Flux findAllBySchoolYearIdAndDeletedAtIsNull(Long schoolYearId); + + /** + * Find statements by subject + */ + Flux findAllBySubjectIdAndDeletedAtIsNull(Long subjectId); + + /** + * Find statements by term + */ + Flux findAllByTermIdAndDeletedAtIsNull(Long termId); + + /** + * Find statements by class + */ + Flux findAllByClassIdAndDeletedAtIsNull(Long classId); + + /** + * Find statements by school year and subject + */ + Flux findAllBySchoolYearIdAndSubjectIdAndDeletedAtIsNull(Long schoolYearId, Long subjectId); + + /** + * Find statements by school year, term, and subject + */ + Flux findAllBySchoolYearIdAndTermIdAndSubjectIdAndDeletedAtIsNull( + Long schoolYearId, Long termId, Long subjectId); + + /** + * Find statements created by a specific user + */ + Flux findAllByCreatedByAndDeletedAtIsNull(Long createdBy); + + /** + * Find statements by source (manual, ocr, import) + */ + Flux findAllBySourceAndDeletedAtIsNull(String source); + + /** + * Find statements created via OCR + */ + @Query("SELECT * FROM statements WHERE source = 'ocr' AND deleted_at IS NULL ORDER BY created_at DESC") + Flux findAllFromOcr(); + + /** + * Find statements by OCR request ID + */ + Mono findByOcrRequestIdAndDeletedAtIsNull(String ocrRequestId); + + /** + * Find statements with OCR confidence below threshold + */ + @Query("SELECT * FROM statements WHERE ocr_confidence < :threshold AND deleted_at IS NULL ORDER BY ocr_confidence ASC") + Flux findAllWithLowOcrConfidence(Double threshold); + + /** + * Find statements by exam type + */ + Flux findAllByExamTypeAndDeletedAtIsNull(String examType); + + /** + * Find statements by variant + */ + Flux findAllByVariantAndDeletedAtIsNull(String variant); + + /** + * Count active statements + */ + Mono countByDeletedAtIsNull(); + + /** + * Count statements needing review + */ + Mono countByNeedsReviewTrueAndDeletedAtIsNull(); + + /** + * Count statements by source + */ + Mono countBySourceAndDeletedAtIsNull(String source); + + /** + * Check if a statement exists by ID and is active + */ + Mono existsByIdAndDeletedAtIsNull(Long id); + + /** + * Search statements by title (case-insensitive partial match) + */ + @Query("SELECT * FROM statements WHERE LOWER(title) LIKE LOWER(CONCAT('%', :searchTerm, '%')) AND deleted_at IS NULL ORDER BY created_at DESC") + Flux searchByTitle(String searchTerm); + + /** + * Find recent statements with pagination + */ + @Query("SELECT * FROM statements WHERE deleted_at IS NULL ORDER BY created_at DESC LIMIT :limit OFFSET :offset") + Flux findRecentStatements(int limit, int offset); + + /** + * Find statements by multiple filters + */ + @Query(""" + SELECT * FROM statements + WHERE deleted_at IS NULL + AND (:schoolYearId IS NULL OR school_year_id = :schoolYearId) + AND (:termId IS NULL OR term_id = :termId) + AND (:subjectId IS NULL OR subject_id = :subjectId) + AND (:classId IS NULL OR class_id = :classId) + AND (:examType IS NULL OR exam_type = :examType) + ORDER BY created_at DESC + """) + Flux findByFilters( + Long schoolYearId, + Long termId, + Long subjectId, + Long classId, + String examType); } diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SubjectRepository.java b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SubjectRepository.java index b839cb2..a03aa7c 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SubjectRepository.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/repository/SubjectRepository.java @@ -1,14 +1,27 @@ -package ao.creativemode.kixi.repository; - -import ao.creativemode.kixi.model.Subject; -import org.springframework.data.repository.reactive.ReactiveCrudRepository; -import reactor.core.publisher.Flux; -import reactor.core.publisher.Mono; - -public interface SubjectRepository extends ReactiveCrudRepository { - - Flux findAllByDeletedAtIsNull(); - Flux findAllByDeletedAtIsNotNull(); - Mono findByIdAndDeletedAtIsNull(Long id); - Mono findByIdAndDeletedAtIsNotNull(Long id); -} +package ao.creativemode.kixi.repository; + +import ao.creativemode.kixi.model.Subject; +import org.springframework.data.repository.reactive.ReactiveCrudRepository; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +public interface SubjectRepository + extends ReactiveCrudRepository +{ + Flux findAllByDeletedAtIsNull(); + Flux findAllByDeletedAtIsNotNull(); + Mono findByIdAndDeletedAtIsNull(Long id); + Mono findByIdAndDeletedAtIsNotNull(Long id); + Mono findByCodeAndDeletedAtIsNull(String code); + Mono findByCodeAndDeletedAtIsNotNull(String code); + + /** + * Find a subject by name (case-insensitive) + */ + Mono findByNameIgnoreCaseAndDeletedAtIsNull(String name); + + /** + * Find subjects by name containing (case-insensitive) + */ + Flux findByNameContainingIgnoreCaseAndDeletedAtIsNull(String name); +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/security/JwtAuthenticationFilter.java b/services/backend-api/src/main/java/ao/creativemode/kixi/security/JwtAuthenticationFilter.java new file mode 100644 index 0000000..b01cdaa --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/security/JwtAuthenticationFilter.java @@ -0,0 +1,55 @@ +package ao.creativemode.kixi.security; + +import ao.creativemode.kixi.service.JwtService; +import io.jsonwebtoken.Claims; +import io.jsonwebtoken.JwtException; +import org.springframework.http.HttpHeaders; +import org.springframework.security.authentication.UsernamePasswordAuthenticationToken; +import org.springframework.security.core.authority.SimpleGrantedAuthority; +import org.springframework.security.core.context.ReactiveSecurityContextHolder; +import org.springframework.stereotype.Component; +import org.springframework.web.server.ServerWebExchange; +import org.springframework.web.server.WebFilter; +import org.springframework.web.server.WebFilterChain; +import reactor.core.publisher.Mono; + +import java.util.List; +import java.util.stream.Collectors; + +@Component +public class JwtAuthenticationFilter implements WebFilter { + + private static final String BEARER_PREFIX = "Bearer "; + + private final JwtService jwtService; + + public JwtAuthenticationFilter(JwtService jwtService) { + this.jwtService = jwtService; + } + + @Override + public Mono filter(ServerWebExchange exchange, WebFilterChain chain) { + String authHeader = exchange.getRequest().getHeaders().getFirst(HttpHeaders.AUTHORIZATION); + if (authHeader == null || !authHeader.startsWith(BEARER_PREFIX)) { + return chain.filter(exchange); + } + String token = authHeader.substring(BEARER_PREFIX.length()).trim(); + if (token.isEmpty()) { + return chain.filter(exchange); + } + try { + Claims claims = jwtService.parseToken(token); + Long accountId = jwtService.getAccountId(claims); + List roles = jwtService.getRoles(claims); + List authorities = roles.stream() + .map(r -> new SimpleGrantedAuthority("ROLE_" + r)) + .collect(Collectors.toList()); + UsernamePasswordAuthenticationToken authentication = + new UsernamePasswordAuthenticationToken(accountId.toString(), null, authorities); + return chain.filter(exchange) + .contextWrite(ReactiveSecurityContextHolder.withAuthentication(authentication)); + } catch (JwtException e) { + return chain.filter(exchange); + } + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/service/AccountService.java b/services/backend-api/src/main/java/ao/creativemode/kixi/service/AccountService.java index 4cfe05f..8849e4e 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/service/AccountService.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/service/AccountService.java @@ -6,7 +6,7 @@ import ao.creativemode.kixi.model.Account; import ao.creativemode.kixi.repository.AccountRepository; import org.springframework.dao.DataIntegrityViolationException; -import org.springframework.security.crypto.bcrypt.BCryptPasswordEncoder; +import org.springframework.security.crypto.password.PasswordEncoder; import org.springframework.stereotype.Service; import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; @@ -17,11 +17,11 @@ public class AccountService { private final AccountRepository repository; - private final BCryptPasswordEncoder passwordEncoder; + private final PasswordEncoder passwordEncoder; - public AccountService(AccountRepository repository) { + public AccountService(AccountRepository repository, PasswordEncoder passwordEncoder) { this.repository = repository; - this.passwordEncoder = new BCryptPasswordEncoder(); + this.passwordEncoder = passwordEncoder; } public Flux findAllActive() { diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/service/AuthService.java b/services/backend-api/src/main/java/ao/creativemode/kixi/service/AuthService.java new file mode 100644 index 0000000..fead86c --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/service/AuthService.java @@ -0,0 +1,146 @@ +package ao.creativemode.kixi.service; + +import ao.creativemode.kixi.common.exception.ApiException; +import ao.creativemode.kixi.dto.auth.LoginResponse; +import ao.creativemode.kixi.model.Account; +import ao.creativemode.kixi.model.AccountRole; +import ao.creativemode.kixi.model.Role; +import ao.creativemode.kixi.model.User; +import ao.creativemode.kixi.repository.AccountRepository; +import ao.creativemode.kixi.repository.AccountRoleRepository; +import ao.creativemode.kixi.repository.RoleRepository; +import ao.creativemode.kixi.repository.UserRepository; +import ao.creativemode.kixi.service.GoogleOAuth2Client.GoogleUserInfo; +import org.springframework.security.crypto.password.PasswordEncoder; +import org.springframework.stereotype.Service; +import reactor.core.publisher.Mono; + +import java.time.LocalDateTime; +import java.time.Instant; +import java.util.List; +import java.util.UUID; +import java.util.stream.Collectors; + +@Service +public class AuthService { + + private static final String DEFAULT_ROLE_NAME = "USER"; + + private final AccountRepository accountRepository; + private final AccountRoleRepository accountRoleRepository; + private final RoleRepository roleRepository; + private final UserRepository userRepository; + private final JwtService jwtService; + private final PasswordEncoder passwordEncoder; + private final GoogleOAuth2Client googleOAuth2Client; + + public AuthService(AccountRepository accountRepository, + AccountRoleRepository accountRoleRepository, + RoleRepository roleRepository, + UserRepository userRepository, + JwtService jwtService, + PasswordEncoder passwordEncoder, + GoogleOAuth2Client googleOAuth2Client) { + this.accountRepository = accountRepository; + this.accountRoleRepository = accountRoleRepository; + this.roleRepository = roleRepository; + this.userRepository = userRepository; + this.jwtService = jwtService; + this.passwordEncoder = passwordEncoder; + this.googleOAuth2Client = googleOAuth2Client; + } + + public Mono login(String usernameOrEmail, String password) { + return findAccountByUsernameOrEmail(usernameOrEmail.trim()) + .switchIfEmpty(Mono.error(ApiException.badRequest("Invalid username or password"))) + .filter(Account::getActive) + .switchIfEmpty(Mono.error(ApiException.badRequest("Account is inactive"))) + .filter(account -> passwordEncoder.matches(password, account.getPasswordHash())) + .switchIfEmpty(Mono.error(ApiException.badRequest("Invalid username or password"))) + .flatMap(account -> recordLogin(account) + .flatMap(updated -> loadRoleNames(updated.getId()) + .map(roles -> buildLoginResponse(updated.getId(), roles)))); + } + + private Mono findAccountByUsernameOrEmail(String input) { + return accountRepository.findByUsernameAndDeletedAtIsNull(input) + .switchIfEmpty(accountRepository.findByEmailAndDeletedAtIsNull(input)); + } + + private Mono recordLogin(Account account) { + account.setLastLogin(java.time.LocalDateTime.now()); + return accountRepository.save(account); + } + + private Mono> loadRoleNames(Long accountId) { + return accountRoleRepository.findByAccountIdAndDeletedAtIsNull(accountId) + .flatMap(ar -> roleRepository.findById(ar.getRoleId())) + .filter(role -> role.getDeletedAt() == null) + .map(Role::getName) + .collect(Collectors.toList()); + } + + private LoginResponse buildLoginResponse(Long accountId, List roles) { + String token = jwtService.generateToken(accountId, roles); + Instant expiresAt = Instant.now().plusMillis(jwtService.getExpirationMs()); + return new LoginResponse(token, LoginResponse.TOKEN_TYPE, expiresAt, accountId, roles); + } + + /** + * Login via Google OAuth2: troca o code por token, obtém userinfo, encontra ou cria Account/User, atribui role padrão, emite JWT. + */ + public Mono loginWithGoogle(String code) { + return googleOAuth2Client.exchangeCodeForAccessToken(code) + .flatMap(googleOAuth2Client::getUserInfo) + .flatMap(this::findOrCreateAccountFromGoogle) + .flatMap(account -> recordLogin(account) + .flatMap(updated -> loadRoleNames(updated.getId()) + .map(roles -> buildLoginResponse(updated.getId(), roles)))); + } + + private Mono findOrCreateAccountFromGoogle(GoogleUserInfo info) { + if (info.email() == null || info.email().isBlank()) { + return Mono.error(ApiException.badRequest("Google did not provide email")); + } + String email = info.email().trim().toLowerCase(); + return accountRepository.findByEmailAndDeletedAtIsNull(email) + .switchIfEmpty(createAccountAndUserFromGoogle(email, info)); + } + + private Mono createAccountAndUserFromGoogle(String email, GoogleUserInfo info) { + String username = email.split("@")[0]; + String passwordHash = passwordEncoder.encode(UUID.randomUUID().toString()); + Account account = new Account(); + account.setUsername(username); + account.setEmail(email); + account.setPasswordHash(passwordHash); + account.setEmailVerified(true); + account.setActive(true); + account.setDeletedAt(null); + + return accountRepository.save(account) + .flatMap(savedAccount -> Mono.when( + roleRepository.findByNameAndDeletedAtIsNull(DEFAULT_ROLE_NAME) + .flatMap(role -> { + AccountRole ar = new AccountRole(savedAccount.getId(), role.getId()); + return accountRoleRepository.save(ar); + }), + createUserFromGoogle(savedAccount.getId(), info) + ).thenReturn(savedAccount)); + } + + private Mono createUserFromGoogle(Long accountId, GoogleUserInfo info) { + String[] names = info.name() != null && !info.name().isBlank() + ? info.name().trim().split("\\s+", 2) + : new String[]{"", ""}; + String firstName = names[0]; + String lastName = names.length > 1 ? names[1] : ""; + User user = new User(); + user.setAccountId(accountId); + user.setFirstName(firstName); + user.setLastName(lastName); + user.setPhoto(info.picture()); + user.setDeletedAt(null); + return userRepository.save(user); + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/service/ChatService.java b/services/backend-api/src/main/java/ao/creativemode/kixi/service/ChatService.java new file mode 100644 index 0000000..eb2d679 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/service/ChatService.java @@ -0,0 +1,81 @@ +package ao.creativemode.kixi.service; + +import ao.creativemode.kixi.dto.ChatMessageDto; +import ao.creativemode.kixi.dto.ChatRequestDto; +import ao.creativemode.kixi.dto.ChatResponseDto; +import lombok.RequiredArgsConstructor; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; +import org.springframework.web.reactive.function.client.WebClient; +import reactor.core.publisher.Mono; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@Service +@RequiredArgsConstructor +public class ChatService { + + private final WebClient groqWebClient; + + @Value("${groq.model:llama-3.3-70b-versatile}") + private String model; + + public Mono chat(ChatRequestDto request) { + List> messages = new ArrayList<>(); + + messages.add(Map.of( + "role", "system", + "content", "Você é um assistente educacional chamado Kixi. Ajude os usuários com questões sobre provas, exames e conteúdos educacionais." + )); + + if (request.getHistory() != null) { + for (ChatMessageDto msg : request.getHistory()) { + messages.add(Map.of( + "role", msg.getRole(), + "content", msg.getContent() + )); + } + } + + messages.add(Map.of( + "role", "user", + "content", request.getMessage() + )); + + Map body = new HashMap<>(); + body.put("model", model); + body.put("messages", messages); + body.put("temperature", 0.7); + body.put("max_tokens", 1024); + + return groqWebClient.post() + .uri("/chat/completions") + .bodyValue(body) + .retrieve() + .bodyToMono(Map.class) + .map(this::parseResponse); + } + + @SuppressWarnings("unchecked") + private ChatResponseDto parseResponse(Map response) { + List> choices = (List>) response.get("choices"); + Map usage = (Map) response.get("usage"); + + String content = ""; + if (choices != null && !choices.isEmpty()) { + Map message = (Map) choices.get(0).get("message"); + content = (String) message.get("content"); + } + + Integer totalTokens = usage != null ? (Integer) usage.get("total_tokens") : null; + + return ChatResponseDto.builder() + .message(content) + .model((String) response.get("model")) + .tokensUsed(totalTokens) + .build(); + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/service/GoogleOAuth2Client.java b/services/backend-api/src/main/java/ao/creativemode/kixi/service/GoogleOAuth2Client.java new file mode 100644 index 0000000..f1050c4 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/service/GoogleOAuth2Client.java @@ -0,0 +1,62 @@ +package ao.creativemode.kixi.service; + +import ao.creativemode.kixi.config.GoogleOAuth2Properties; +import org.springframework.stereotype.Component; +import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.MultiValueMap; +import org.springframework.web.reactive.function.BodyInserters; +import org.springframework.web.reactive.function.client.WebClient; +import reactor.core.publisher.Mono; + +import java.util.Map; + +/** + * Cliente para trocar code por token e obter userinfo do Google OAuth2. + */ +@Component +public class GoogleOAuth2Client { + + private final GoogleOAuth2Properties properties; + private final WebClient webClient = WebClient.builder().build(); + + public GoogleOAuth2Client(GoogleOAuth2Properties properties) { + this.properties = properties; + } + + public Mono exchangeCodeForAccessToken(String code) { + if (properties.getClientId() == null || properties.getClientSecret() == null) { + return Mono.error(new IllegalStateException("Google OAuth2 not configured (clientId/clientSecret missing)")); + } + MultiValueMap form = new LinkedMultiValueMap<>(); + form.add("code", code); + form.add("client_id", properties.getClientId()); + form.add("client_secret", properties.getClientSecret()); + form.add("redirect_uri", properties.getRedirectUri()); + form.add("grant_type", "authorization_code"); + + return webClient.post() + .uri(properties.getTokenUri()) + .contentType(org.springframework.http.MediaType.APPLICATION_FORM_URLENCODED) + .body(BodyInserters.fromFormData(form)) + .retrieve() + .bodyToMono(Map.class) + .map(m -> (String) m.get("access_token")) + .filter(t -> t != null && !t.isBlank()) + .switchIfEmpty(Mono.error(new IllegalArgumentException("Invalid or expired code"))); + } + + public Mono getUserInfo(String accessToken) { + return webClient.get() + .uri(properties.getUserInfoUri()) + .headers(h -> h.setBearerAuth(accessToken)) + .retrieve() + .bodyToMono(Map.class) + .map(m -> new GoogleUserInfo( + (String) m.get("email"), + (String) m.get("name"), + (String) m.get("picture") + )); + } + + public record GoogleUserInfo(String email, String name, String picture) {} +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/service/JwtService.java b/services/backend-api/src/main/java/ao/creativemode/kixi/service/JwtService.java new file mode 100644 index 0000000..41d0e0b --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/service/JwtService.java @@ -0,0 +1,63 @@ +package ao.creativemode.kixi.service; + +import ao.creativemode.kixi.config.JwtProperties; +import io.jsonwebtoken.Claims; +import io.jsonwebtoken.Jwts; +import io.jsonwebtoken.security.Keys; +import org.springframework.stereotype.Service; + +import javax.crypto.SecretKey; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.util.Date; +import java.util.List; +import java.util.stream.Collectors; + +@Service +public class JwtService { + + private final JwtProperties properties; + private final SecretKey key; + + public JwtService(JwtProperties properties) { + this.properties = properties; + this.key = Keys.hmacShaKeyFor(properties.getSecret().getBytes(StandardCharsets.UTF_8)); + } + + public String generateToken(Long accountId, List roles) { + Instant now = Instant.now(); + Instant expiry = now.plusMillis(properties.getExpirationMs()); + return Jwts.builder() + .subject(accountId.toString()) + .claim("roles", roles) + .issuedAt(Date.from(now)) + .expiration(Date.from(expiry)) + .signWith(key) + .compact(); + } + + public Claims parseToken(String token) { + return Jwts.parser() + .verifyWith(key) + .build() + .parseSignedClaims(token) + .getPayload(); + } + + @SuppressWarnings("unchecked") + public List getRoles(Claims claims) { + List list = claims.get("roles", List.class); + if (list == null) return List.of(); + return list.stream() + .map(Object::toString) + .collect(Collectors.toList()); + } + + public Long getAccountId(Claims claims) { + return Long.parseLong(claims.getSubject()); + } + + public long getExpirationMs() { + return properties.getExpirationMs(); + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/service/OcrPersistenceService.java b/services/backend-api/src/main/java/ao/creativemode/kixi/service/OcrPersistenceService.java new file mode 100644 index 0000000..75d73e8 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/service/OcrPersistenceService.java @@ -0,0 +1,778 @@ +package ao.creativemode.kixi.service; + +import ao.creativemode.kixi.client.OcrServiceClient; +import ao.creativemode.kixi.common.exception.ApiException; +import ao.creativemode.kixi.dto.ocr.OcrResponse; +import ao.creativemode.kixi.dto.ocr.OcrResponse.ExtractedOption; +import ao.creativemode.kixi.dto.ocr.OcrResponse.ExtractedQuestion; +import ao.creativemode.kixi.dto.ocr.OcrResponse.OcrMetadata; +import ao.creativemode.kixi.model.Class; +import ao.creativemode.kixi.model.Course; +import ao.creativemode.kixi.model.Question; +import ao.creativemode.kixi.model.QuestionOption; +import ao.creativemode.kixi.model.SchoolYear; +import ao.creativemode.kixi.model.Statement; +import ao.creativemode.kixi.model.Subject; +import ao.creativemode.kixi.repository.ClassRepository; +import ao.creativemode.kixi.repository.CourseRepository; +import ao.creativemode.kixi.repository.QuestionOptionRepository; +import ao.creativemode.kixi.repository.QuestionRepository; +import ao.creativemode.kixi.repository.SchoolYearRepository; +import ao.creativemode.kixi.repository.StatementRepository; +import ao.creativemode.kixi.repository.SubjectRepository; +import java.time.LocalDateTime; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.codec.multipart.FilePart; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +/** + * Service for persisting OCR extraction results. + * + * Handles the complete workflow of: + * - Extracting data via OCR + * - Looking up or creating related entities (SchoolYear, Course, Subject, Class) + * - Creating Statement with Questions and Options + * + * Implements the uniqueness constraints: + * - school_years: unique by (start_year, end_year) + * - courses: unique by name (normalized) + * - subjects: unique by name + * - classes: unique by (grade, course_id, school_year_id) + * - statement: unique by (title + variant + school_year_id + subject_id + class_id) + */ +@Service +public class OcrPersistenceService { + + private static final Logger log = LoggerFactory.getLogger( + OcrPersistenceService.class + ); + + private static final double LOW_CONFIDENCE_THRESHOLD = 0.8; + private static final double MIN_CONFIDENCE_THRESHOLD = 0.5; + + private final OcrServiceClient ocrServiceClient; + private final StatementRepository statementRepository; + private final QuestionRepository questionRepository; + private final QuestionOptionRepository optionRepository; + private final SchoolYearRepository schoolYearRepository; + private final CourseRepository courseRepository; + private final SubjectRepository subjectRepository; + private final ClassRepository classRepository; + + public OcrPersistenceService( + OcrServiceClient ocrServiceClient, + StatementRepository statementRepository, + QuestionRepository questionRepository, + QuestionOptionRepository optionRepository, + SchoolYearRepository schoolYearRepository, + CourseRepository courseRepository, + SubjectRepository subjectRepository, + ClassRepository classRepository + ) { + this.ocrServiceClient = ocrServiceClient; + this.statementRepository = statementRepository; + this.questionRepository = questionRepository; + this.optionRepository = optionRepository; + this.schoolYearRepository = schoolYearRepository; + this.courseRepository = courseRepository; + this.subjectRepository = subjectRepository; + this.classRepository = classRepository; + } + + // ========================================================================= + // Main OCR Processing Methods + // ========================================================================= + + /** + * Process uploaded files via OCR and persist the results. + * + * @param files List of uploaded image files + * @param createdBy ID of the user creating the statement + * @return Mono containing the created statement with all related data + */ + @Transactional + public Mono processAndPersist( + List files, + Long createdBy + ) { + log.info( + "Processing OCR and persisting: {} file(s), createdBy={}", + files.size(), + createdBy + ); + + return ocrServiceClient + .extractText(files) + .flatMap(ocrResponse -> { + if (ocrResponse.isError()) { + log.error( + "OCR extraction failed: {}", + ocrResponse.errorMessage() + ); + return Mono.error( + ApiException.badRequest( + "OCR extraction failed: " + + ocrResponse.errorMessage() + ) + ); + } + + log.info( + "OCR extraction successful: requestId={}, confidence={}, questions={}", + ocrResponse.requestId(), + ocrResponse.overallConfidence(), + ocrResponse.questions() != null + ? ocrResponse.questions().size() + : 0 + ); + + return persistOcrResponse(ocrResponse, createdBy); + }) + .doOnSuccess(result -> + log.info( + "Statement created from OCR: statementId={}", + result.statement().getId() + ) + ) + .doOnError(error -> + log.error("Failed to process and persist OCR", error) + ); + } + + /** + * Persist an OCR response to the database. + * + * @param ocrResponse The OCR response containing extracted data + * @param createdBy ID of the user creating the statement + * @return Mono containing the created statement with all related data + */ + @Transactional + public Mono persistOcrResponse( + OcrResponse ocrResponse, + Long createdBy + ) { + OcrMetadata metadata = ocrResponse.metadata(); + + // Step 1: Find or create SchoolYear + Mono schoolYearMono = findOrCreateSchoolYear(metadata); + + // Step 2: Find or create Course + Mono courseMono = findOrCreateCourse(metadata); + + // Step 3: Find or create Subject + Mono subjectMono = findOrCreateSubject(metadata); + + // Combine the lookups and then create Class and Statement + return Mono.zip(schoolYearMono, courseMono, subjectMono).flatMap( + tuple -> { + SchoolYear schoolYear = tuple.getT1(); + Course course = tuple.getT2(); + Subject subject = tuple.getT3(); + + // Step 4: Find or create Class + return findOrCreateClass(metadata, course, schoolYear).flatMap( + classEntity -> { + // Step 5: Create Statement + return createStatement( + ocrResponse, + metadata, + createdBy, + schoolYear, + course, + subject, + classEntity + ).flatMap(statement -> { + // Step 6: Create Questions + return createQuestions( + statement.getId(), + ocrResponse.questions() + ) + .collectList() + .flatMap(questions -> { + // Step 7: Load all options + List questionIds = questions + .stream() + .map(Question::getId) + .toList(); + + return optionRepository + .findAllByQuestionIds(questionIds) + .collectList() + .map(options -> + new StatementWithRelations( + statement, + schoolYear, + course, + subject, + classEntity, + questions, + options, + ocrResponse.imagesToUpload() + ) + ); + }); + }); + } + ); + } + ); + } + + // ========================================================================= + // Entity Lookup/Create Methods + // ========================================================================= + + /** + * Find or create a SchoolYear based on OCR metadata. + */ + private Mono findOrCreateSchoolYear(OcrMetadata metadata) { + Integer startYear = metadata.getSchoolYearStartValue(); + Integer endYear = metadata.getSchoolYearEndValue(); + + if (startYear == null || endYear == null) { + // Default to current academic year + int currentYear = LocalDateTime.now().getYear(); + int currentMonth = LocalDateTime.now().getMonthValue(); + // Academic year in Angola typically starts in September + if (currentMonth >= 9) { + startYear = currentYear; + endYear = currentYear + 1; + } else { + startYear = currentYear - 1; + endYear = currentYear; + } + log.warn( + "School year not extracted, using default: {}/{}", + startYear, + endYear + ); + } + + final Integer finalStartYear = startYear; + final Integer finalEndYear = endYear; + + return schoolYearRepository + .findByStartYearAndEndYearAndDeletedAtIsNull(startYear, endYear) + .switchIfEmpty( + Mono.defer(() -> { + log.info( + "Creating new school year: {}/{}", + finalStartYear, + finalEndYear + ); + SchoolYear newSchoolYear = new SchoolYear(); + newSchoolYear.setStartYear(finalStartYear); + newSchoolYear.setEndYear(finalEndYear); + return schoolYearRepository.save(newSchoolYear); + }) + ); + } + + /** + * Find or create a Course based on OCR metadata. + */ + private Mono findOrCreateCourse(OcrMetadata metadata) { + String courseName = metadata.getCourseNameValue(); + + if (courseName == null || courseName.isBlank()) { + courseName = "TODOS"; // Default course for general exams + } + + // Normalize course name + String normalizedName = normalizeCourseName(courseName); + final String finalCourseName = normalizedName; + + return courseRepository + .findByNameIgnoreCaseAndDeletedAtIsNull(normalizedName) + .switchIfEmpty( + Mono.defer(() -> { + log.info("Creating new course: {}", finalCourseName); + Course newCourse = new Course(); + newCourse.setName(finalCourseName); + newCourse.setCode(generateCourseCode(finalCourseName)); + return courseRepository.save(newCourse); + }) + ); + } + + /** + * Find or create a Subject based on OCR metadata. + */ + private Mono findOrCreateSubject(OcrMetadata metadata) { + String subjectName = metadata.getSubjectNameValue(); + + if (subjectName == null || subjectName.isBlank()) { + return Mono.error( + ApiException.badRequest( + "Subject name is required but not extracted from OCR" + ) + ); + } + + // Normalize subject name + String normalizedName = normalizeSubjectName(subjectName); + final String finalSubjectName = normalizedName; + + return subjectRepository + .findByNameIgnoreCaseAndDeletedAtIsNull(normalizedName) + .switchIfEmpty( + Mono.defer(() -> { + log.info("Creating new subject: {}", finalSubjectName); + Subject newSubject = new Subject(); + newSubject.setName(finalSubjectName); + newSubject.setCode(generateSubjectCode(finalSubjectName)); + newSubject.setShortName( + generateShortName(finalSubjectName) + ); + return subjectRepository.save(newSubject); + }) + ); + } + + /** + * Find or create a Class based on OCR metadata. + */ + private Mono findOrCreateClass( + OcrMetadata metadata, + Course course, + SchoolYear schoolYear + ) { + String gradeStr = metadata.getClassGradeValue(); + + if (gradeStr == null || gradeStr.isBlank()) { + gradeStr = "12"; // Default to 12th grade for exams + log.warn("Class grade not extracted, using default: {}", gradeStr); + } + + // Parse grade to Integer + Integer grade; + try { + grade = Integer.parseInt(gradeStr.replaceAll("[^0-9]", "")); + } catch (NumberFormatException e) { + grade = 12; // Default to 12th grade + log.warn("Could not parse grade '{}', using default: 12", gradeStr); + } + + final Integer finalGrade = grade; + final String gradeCode = String.valueOf(grade); + + // Try to find with course first + if (course != null && course.getId() != null) { + return classRepository + .findByGradeAndCourseIdAndSchoolYearIdAndDeletedAtIsNull( + grade, + course.getId(), + schoolYear.getId() + ) + .switchIfEmpty( + Mono.defer(() -> { + log.info( + "Creating new class: grade={}, courseId={}, schoolYearId={}", + finalGrade, + course.getId(), + schoolYear.getId() + ); + Class newClass = new Class(); + newClass.setGrade(finalGrade); + newClass.setCourseId(course.getId()); + newClass.setSchoolYearId(schoolYear.getId()); + newClass.setCode( + generateClassCode(gradeCode, course.getCode()) + ); + return classRepository.save(newClass); + }) + ); + } + + // Find without course + return classRepository + .findByGradeAndSchoolYearIdAndDeletedAtIsNull( + grade, + schoolYear.getId() + ) + .switchIfEmpty( + Mono.defer(() -> { + log.info( + "Creating new class: grade={}, schoolYearId={}", + finalGrade, + schoolYear.getId() + ); + Class newClass = new Class(); + newClass.setGrade(finalGrade); + newClass.setSchoolYearId(schoolYear.getId()); + newClass.setCode(generateClassCode(gradeCode, null)); + return classRepository.save(newClass); + }) + ); + } + + // ========================================================================= + // Statement and Questions Creation + // ========================================================================= + + /** + * Create a Statement from OCR data. + */ + private Mono createStatement( + OcrResponse ocrResponse, + OcrMetadata metadata, + Long createdBy, + SchoolYear schoolYear, + Course course, + Subject subject, + Class classEntity + ) { + Statement statement = new Statement(); + + // Title + if (metadata.title() != null && metadata.title().value() != null) { + statement.setTitle(metadata.title().value()); + } else { + statement.setTitle( + buildDefaultTitle(metadata, subject, classEntity, schoolYear) + ); + } + + // Exam type + if ( + metadata.examType() != null && metadata.examType().value() != null + ) { + statement.setExamType(metadata.examType().value()); + } else { + statement.setExamType("Prova de Exame"); + } + + // Duration + if ( + metadata.durationMinutes() != null && + metadata.durationMinutes().value() != null + ) { + statement.setDurationMinutes(metadata.durationMinutes().value()); + } + + // Variant + if (metadata.variant() != null && metadata.variant().value() != null) { + statement.setVariant(metadata.variant().value()); + } + + // Instructions + if ( + metadata.instructions() != null && + metadata.instructions().value() != null + ) { + statement.setInstructions(metadata.instructions().value()); + } + + // Total max score + if (metadata.getTotalMaxScoreValue() != null) { + statement.setTotalMaxScore(metadata.getTotalMaxScoreValue()); + } else { + // Calculate from questions + double totalScore = + ocrResponse.questions() != null + ? ocrResponse + .questions() + .stream() + .filter(q -> q.getCotacaoValue() != null) + .mapToDouble(ExtractedQuestion::getCotacaoValue) + .sum() + : 0.0; + if (totalScore > 0) { + statement.setTotalMaxScore(totalScore); + } + } + + // Set foreign keys + statement.setSchoolYearId(schoolYear.getId()); + statement.setCourseId(course != null ? course.getId() : null); + statement.setSubjectId(subject.getId()); + statement.setClassId(classEntity.getId()); + statement.setCreatedBy(createdBy); + + // OCR metadata + statement.setOcrMetadata( + ocrResponse.requestId(), + ocrResponse.overallConfidence(), + ocrResponse.needsReview() + ); + statement.setSource("ocr"); + statement.setVisible(false); // Require manual review before publishing + + return statementRepository.save(statement); + } + + /** + * Create questions from OCR extracted data. + */ + private Flux createQuestions( + Long statementId, + List extractedQuestions + ) { + if (extractedQuestions == null || extractedQuestions.isEmpty()) { + return Flux.empty(); + } + + return Flux.fromIterable(extractedQuestions) + .index() + .flatMap(tuple -> { + long index = tuple.getT1(); + ExtractedQuestion extracted = tuple.getT2(); + + Question question = mapExtractedToQuestion( + statementId, + extracted, + (int) index + ); + + return questionRepository + .save(question) + .flatMap(savedQuestion -> { + // Create options if this is a multiple choice question + if ( + extracted.options() != null && + !extracted.options().isEmpty() + ) { + return createOptions( + savedQuestion.getId(), + extracted.options() + ).then(Mono.just(savedQuestion)); + } + return Mono.just(savedQuestion); + }); + }); + } + + /** + * Map extracted question to Question entity. + */ + private Question mapExtractedToQuestion( + Long statementId, + ExtractedQuestion extracted, + int orderIndex + ) { + Question question = new Question(); + question.setStatementId(statementId); + + // Parse number (might be string like "1", "2a", etc.) + try { + question.setNumber( + Integer.parseInt(extracted.number().replaceAll("[^0-9]", "")) + ); + } catch (NumberFormatException e) { + question.setNumber(orderIndex + 1); + } + + question.setOrderIndex(orderIndex); + question.setText(extracted.getTextValue()); + question.setQuestionType(mapQuestionType(extracted.getTypeValue())); + + // Cotação (score) + if (extracted.getCotacaoValue() != null) { + question.setMaxScore(extracted.getCotacaoValue()); + } + + // OCR metadata + question.setOcrConfidence(extracted.confidence()); + question.setPageIndex(extracted.pageIndex()); + + // Mark for review if low confidence + question.setNeedsReview( + extracted.confidence() != null && + extracted.confidence() < LOW_CONFIDENCE_THRESHOLD + ); + + return question; + } + + /** + * Map question type from Portuguese to database format. + */ + private String mapQuestionType(String type) { + if (type == null) { + return "unknown"; + } + return switch (type.toLowerCase()) { + case "dissertativa" -> "development"; + case "multipla_escolha" -> "multiple_choice"; + default -> type; + }; + } + + /** + * Create options for a multiple choice question. + */ + private Flux createOptions( + Long questionId, + List extractedOptions + ) { + return Flux.fromIterable(extractedOptions) + .index() + .flatMap(tuple -> { + int index = tuple.getT1().intValue(); + ExtractedOption extracted = tuple.getT2(); + + QuestionOption option = new QuestionOption(); + option.setQuestionId(questionId); + option.setOptionLabel(extracted.optionLabel()); + option.setOptionText(extracted.optionText()); + option.setOrderIndex(index); + option.setOcrConfidence(extracted.confidence()); + option.setIsCorrect(false); // OCR cannot determine correct answer + + return optionRepository.save(option); + }); + } + + // ========================================================================= + // Utility Methods + // ========================================================================= + + /** + * Normalize course name for consistent storage. + */ + private String normalizeCourseName(String name) { + if (name == null) return null; + return name.trim().toUpperCase(); + } + + /** + * Normalize subject name with proper Portuguese capitalization. + */ + private String normalizeSubjectName(String name) { + if (name == null) return null; + + // Subject name corrections + String normalized = name.trim(); + return switch (normalized.toLowerCase()) { + case "matematica", "matemática" -> "Matemática"; + case "fisica", "física" -> "Física"; + case "quimica", "química" -> "Química"; + case "biologia" -> "Biologia"; + case "portugues", "português" -> "Português"; + case "ingles", "inglês" -> "Inglês"; + case "frances", "francês" -> "Francês"; + case "historia", "história" -> "História"; + case "geografia" -> "Geografia"; + case "filosofia" -> "Filosofia"; + default -> toTitleCase(normalized); + }; + } + + /** + * Convert string to title case. + */ + private String toTitleCase(String text) { + if (text == null || text.isEmpty()) return text; + String[] words = text.split("\\s+"); + StringBuilder result = new StringBuilder(); + for (String word : words) { + if (!result.isEmpty()) result.append(" "); + if (!word.isEmpty()) { + result.append(Character.toUpperCase(word.charAt(0))); + if (word.length() > 1) { + result.append(word.substring(1).toLowerCase()); + } + } + } + return result.toString(); + } + + /** + * Generate course code from name. + */ + private String generateCourseCode(String name) { + if (name == null) return "GEN"; + return name.length() > 3 + ? name.substring(0, 3).toUpperCase() + : name.toUpperCase(); + } + + /** + * Generate subject code from name. + */ + private String generateSubjectCode(String name) { + if (name == null) return "GEN"; + String code = name + .replaceAll("[aeiouáàâãéèêíïóôõöúç\\s]", "") + .toUpperCase(); + return code.length() > 4 + ? code.substring(0, 4) + : (code.isEmpty() + ? name.substring(0, Math.min(3, name.length())).toUpperCase() + : code); + } + + /** + * Generate short name for subject. + */ + private String generateShortName(String name) { + if (name == null) return null; + if (name.length() <= 5) return name; + return name.substring(0, 5) + "."; + } + + /** + * Generate class code. + */ + private String generateClassCode(String grade, String courseCode) { + if (courseCode != null && !courseCode.isBlank()) { + return grade + "-" + courseCode; + } + return grade + "-GEN"; + } + + /** + * Build a default title from metadata. + */ + private String buildDefaultTitle( + OcrMetadata metadata, + Subject subject, + Class classEntity, + SchoolYear schoolYear + ) { + StringBuilder title = new StringBuilder("Prova de Exame"); + + if (subject != null) { + title.append(" de ").append(subject.getName()); + } + + if (classEntity != null) { + title.append(" ").append(classEntity.getGrade()).append("ª Classe"); + } + + if (metadata.variant() != null && metadata.variant().value() != null) { + title.append(" - Série ").append(metadata.variant().value()); + } + + title + .append(" - ") + .append(schoolYear.getStartYear()) + .append("/") + .append(schoolYear.getEndYear()); + + return title.toString(); + } + + // ========================================================================= + // Result Records + // ========================================================================= + + /** + * Complete result with statement and all related entities. + */ + public record StatementWithRelations( + Statement statement, + SchoolYear schoolYear, + Course course, + Subject subject, + Class classEntity, + List questions, + List options, + List imagesToUpload + ) {} +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/service/QuestionImageService.java b/services/backend-api/src/main/java/ao/creativemode/kixi/service/QuestionImageService.java new file mode 100644 index 0000000..e98cd55 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/service/QuestionImageService.java @@ -0,0 +1,166 @@ +package ao.creativemode.kixi.service; + +import ao.creativemode.kixi.common.exception.ApiException; +import ao.creativemode.kixi.dto.questionimage.QuestionImageRequest; +import ao.creativemode.kixi.dto.questionimage.QuestionImageResponse; +import ao.creativemode.kixi.model.QuestionImage; +import ao.creativemode.kixi.repository.QuestionImageRepository; +import org.springframework.http.codec.multipart.FilePart; +import org.springframework.stereotype.Service; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDateTime; +import java.util.UUID; + +@Service +public class QuestionImageService { + + private final QuestionImageRepository repository; + + /** + * Physical path pointing to the static resources folder for Maven projects + */ + private final Path root = Paths.get("services/backend-api/src/main/resources/static/uploads/questions"); + + public QuestionImageService(QuestionImageRepository repository) { + this.repository = repository; + try { + // Ensure the physical directory exists on service startup + Files.createDirectories(root); + } catch (IOException e) { + throw new RuntimeException("Could not initialize folder for upload!"); + } + } + + /** + * Retrieves all active (non-deleted) question images + */ + public Flux findAllActive() { + return repository.findAllByDeletedAtIsNull() + .map(this::toResponse); + } + + /** + * Retrieves all soft-deleted question images + */ + public Flux findAllDeleted() { + return repository.findAllByDeletedAtIsNotNull() + .map(this::toResponse); + } + + /** + * Finds all images associated with a specific question that have not been deleted + */ + public Flux findByQuestionId(Long questionId) { + return repository.findByQuestionIdAndDeletedAtIsNullOrderByOrderIndexAsc(questionId) + .map(this::toResponse); + } + + /** + * Retrieves a single active question image by its ID + */ + public Mono findByIdActive(Long id) { + return repository.findByIdAndDeletedAtIsNull(id) + .switchIfEmpty(Mono.error(ApiException.notFound("Question image not found"))) + .map(this::toResponse); + } + + /** + * Creates a new QuestionImage by saving the physical file to the resources folder + * and generating its public URL + */ + public Mono createWithFile(QuestionImageRequest dto, Mono filePartMono) { + return filePartMono.flatMap(filePart -> { + // Generate a unique filename to prevent overwriting + String filename = UUID.randomUUID() + "-" + filePart.filename(); + Path targetPath = this.root.resolve(filename); + + // Transfer the incoming file bytes to the physical target path + return filePart.transferTo(targetPath) + .then(Mono.defer(() -> { + QuestionImage entity = new QuestionImage(); + entity.setQuestionId(dto.questionId()); + + // Set the public URL path (mapped via WebFlux static resources) + entity.setImageUrl("/uploads/questions/" + filename); + entity.setCaption(dto.caption()); + entity.setOrderIndex(dto.orderIndex() != null ? dto.orderIndex() : 0); + entity.setDeletedAt(null); + + return repository.save(entity); + })); + }).map(this::toResponse); + } + + /** + * Updates metadata (caption, order) for an existing active question image + */ + public Mono update(Long id, QuestionImageRequest dto) { + return repository.findByIdAndDeletedAtIsNull(id) + .switchIfEmpty(Mono.error(ApiException.notFound("Question image not found"))) + .flatMap(entity -> { + entity.setCaption(dto.caption() != null ? dto.caption() : entity.getCaption()); + entity.setOrderIndex(dto.orderIndex() != null ? dto.orderIndex() : entity.getOrderIndex()); + entity.setUpdatedAt(LocalDateTime.now()); + + return repository.save(entity); + }) + .map(this::toResponse); + } + + /** + * Marks a question image as deleted (Soft Delete) + */ + public Mono softDelete(Long id) { + return repository.findByIdAndDeletedAtIsNull(id) + .switchIfEmpty(Mono.error(ApiException.notFound("Question image not found"))) + .flatMap(entity -> { + entity.markAsDeleted(); + return repository.save(entity); + }) + .then(); + } + + /** + * Restores a previously soft-deleted question image + */ + public Mono restore(Long id) { + return repository.findByIdAndDeletedAtIsNotNull(id) + .switchIfEmpty(Mono.error(ApiException.badRequest("Question image is not deleted"))) + .flatMap(entity -> { + entity.restore(); + return repository.save(entity); + }) + .then(); + } + + /** + * Permanently removes a question image from the database + */ + public Mono hardDelete(Long id) { + return repository.findByIdAndDeletedAtIsNotNull(id) + .switchIfEmpty(Mono.error(ApiException.badRequest("Only deleted images can be permanently removed"))) + .flatMap(repository::delete) + .then(); + } + + /** + * Converts the internal Entity to a Response DTO + */ + private QuestionImageResponse toResponse(QuestionImage entity) { + return new QuestionImageResponse( + entity.getId(), + entity.getQuestionId(), + entity.getImageUrl(), + entity.getCaption(), + entity.getOrderIndex(), + entity.getCreatedAt(), + entity.getUpdatedAt(), + entity.getDeletedAt()); + } +} \ No newline at end of file diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/service/SimulationAnswerService.java b/services/backend-api/src/main/java/ao/creativemode/kixi/service/SimulationAnswerService.java new file mode 100644 index 0000000..bdf26c1 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/service/SimulationAnswerService.java @@ -0,0 +1,144 @@ +package ao.creativemode.kixi.service; + +import ao.creativemode.kixi.common.exception.ApiException; +import ao.creativemode.kixi.dto.simulationanswer.SimulationAnswerRequest; +import ao.creativemode.kixi.dto.simulationanswer.SimulationAnswerResponse; +import ao.creativemode.kixi.model.SimulationAnswer; +import ao.creativemode.kixi.repository.SimulationAnswerRepository; +import java.time.LocalDateTime; +import org.springframework.dao.DataIntegrityViolationException; +import org.springframework.stereotype.Service; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +@Service +public class SimulationAnswerService { + + private final SimulationAnswerRepository repository; + + public SimulationAnswerService(SimulationAnswerRepository repository) { + this.repository = repository; + } + + public Flux findAllActive() { + return repository.findAllByDeletedAtIsNull().map(this::toResponse); + } + + public Flux findAllDeleted() { + return repository.findAllByDeletedAtIsNotNull().map(this::toResponse); + } + + public Mono findByIdActive(Long id) { + return repository + .findByIdAndDeletedAtIsNull(id) + .switchIfEmpty( + Mono.error(ApiException.notFound("Simulation answer not found")) + ) + .map(this::toResponse); + } + + public Mono create( + SimulationAnswerRequest request + ) { + SimulationAnswer answer = new SimulationAnswer(); + answer.setSimulationId(request.simulationId()); + answer.setQuestionId(request.questionId()); + answer.setSelectedOptionId(request.selectedOptionId()); + answer.setAnswerText(request.answerText()); + answer.setAnsweredAt(request.answeredAt()); + + return repository + .save(answer) + .map(this::toResponse) + .onErrorMap(DataIntegrityViolationException.class, e -> + ApiException.conflict( + "A simulation answer with this parameter already exists." + ) + ); + } + + public Mono update( + Long id, + SimulationAnswerRequest request + ) { + return repository + .findByIdAndDeletedAtIsNull(id) + .switchIfEmpty( + Mono.error(ApiException.notFound("Simulation answer not found")) + ) + .flatMap(answer -> { + answer.setSimulationId(request.simulationId()); + answer.setQuestionId(request.questionId()); + answer.setSelectedOptionId(request.selectedOptionId()); + answer.setAnswerText(request.answerText()); + answer.setAnsweredAt(request.answeredAt()); + answer.setUpdatedAt(LocalDateTime.now()); + + return repository.save(answer); + }) + .map(this::toResponse) + .onErrorMap(DataIntegrityViolationException.class, e -> + ApiException.conflict( + "A simulation answer with this parameter already exists." + ) + ); + } + + public Mono softDelete(Long id) { + return repository + .findByIdAndDeletedAtIsNull(id) + .switchIfEmpty( + Mono.error(ApiException.notFound("Simulation answer not found")) + ) + .flatMap(entity -> { + entity.markAsDeleted(); + return repository.save(entity); + }) + .then(); + } + + public Mono restore(Long id) { + return repository + .findByIdAndDeletedAtIsNotNull(id) + .switchIfEmpty( + Mono.error( + ApiException.badRequest("Simulation answer is not deleted") + ) + ) + .flatMap(entity -> { + entity.restore(); + return repository.save(entity); + }) + .then(); + } + + public Mono hardDelete(Long id) { + return repository + .findByIdAndDeletedAtIsNotNull(id) + .switchIfEmpty( + Mono.error( + ApiException.badRequest( + "Only deleted simulation answers can be permanently removed" + ) + ) + ) + .flatMap(repository::delete) + .then(); + } + + private SimulationAnswerResponse toResponse(SimulationAnswer entity) { + return new SimulationAnswerResponse( + entity.getId(), + entity.getSimulationId(), + entity.getQuestionId(), + entity.getSelectedOptionId(), + entity.getAnswerText(), + entity.getScoreObtained(), + entity.getIsCorrect(), + entity.getAnsweredAt(), + entity.getCreatedAt(), + entity.getUpdatedAt(), + entity.getDeletedAt() + ); + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/service/SimulationService.java b/services/backend-api/src/main/java/ao/creativemode/kixi/service/SimulationService.java new file mode 100644 index 0000000..76880a9 --- /dev/null +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/service/SimulationService.java @@ -0,0 +1,198 @@ +package ao.creativemode.kixi.service; + +import java.time.LocalDateTime; + +import org.springframework.stereotype.Service; + +import ao.creativemode.kixi.common.exception.ApiException; +import ao.creativemode.kixi.dto.accounts.AccountBasicResponse; +import ao.creativemode.kixi.dto.schoolyears.SchoolYearResponse; +import ao.creativemode.kixi.dto.simulation.SimulationRequest; +import ao.creativemode.kixi.dto.simulation.SimulationResponse; +import ao.creativemode.kixi.dto.statement.StatementBasicResponse; +import ao.creativemode.kixi.model.*; +import ao.creativemode.kixi.repository.*; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +@Service +public class SimulationService { + + private final SimulationRepository repository; + private final AccountRepository accountRepository; + private final SchoolYearRepository schoolYearRepository; + private final StatementRepository statementRepository; + + public SimulationService( + SimulationRepository repository, + AccountRepository accountRepository, + SchoolYearRepository schoolYearRepository, + StatementRepository statementRepository + ) { + this.repository = repository; + this.accountRepository = accountRepository; + this.schoolYearRepository = schoolYearRepository; + this.statementRepository = statementRepository; + } + + public Flux findAllActive() { + return repository.findByDeletedAtIsNull() + .flatMap(this::toResponse); + } + + public Flux findAllTrashed() { + return repository.findByDeletedAtIsNotNull() + .flatMap(this::toResponse); + } + + public Mono findById(Long id) { + return repository.findByIdAndDeletedAtIsNull(id) + .flatMap(this::toResponse); + } + + public Mono create(SimulationRequest dto) { + return accountRepository.findById(dto.accountId()) + .switchIfEmpty(Mono.error(ApiException.notFound("Account not found"))) + .then(Mono.defer(() -> { + if (dto.schoolYearId() != null) { + return schoolYearRepository.findById(dto.schoolYearId()) + .switchIfEmpty(Mono.error(ApiException.notFound("SchoolYear not found"))) + .then(Mono.just(true)); + } + return Mono.just(true); + })) + .then(Mono.defer(() -> { + Simulation simulation = new Simulation(); + simulation.setAccountId(dto.accountId()); + simulation.setSchoolYearId(dto.schoolYearId()); + simulation.setStatementId(dto.statementId()); + simulation.setStartedAt(dto.startedAt() != null ? dto.startedAt() : LocalDateTime.now()); + simulation.setStatus(SimulationStatus.IN_PROGRESS); + return repository.save(simulation); + })) + .flatMap(this::toResponse); + } + + public Mono update(Long id, SimulationRequest dto) { + return repository.findByIdAndDeletedAtIsNull(id) + .switchIfEmpty(Mono.error(ApiException.notFound("Simulation not found!"))) + .flatMap(simulation -> { + if (!SimulationStatus.IN_PROGRESS.equals(simulation.getStatus())) { + return Mono.error(ApiException.badRequest("Simulation cannot be updated")); + } + + if (dto.status() != null && dto.status() != SimulationStatus.FINISHED + && dto.status() != SimulationStatus.CANCELLED) { + return Mono.error(ApiException.badRequest("Invalid status")); + } + + if (dto.status() == SimulationStatus.FINISHED) { + if (dto.finishedAt() == null || dto.timeSpentSeconds() == null) { + return Mono.error(ApiException.badRequest( + "finishedAt and timeSpentSeconds are required" + )); + } + simulation.setFinishedAt(dto.finishedAt()); + simulation.setTimeSpentSeconds(dto.timeSpentSeconds()); + simulation.setFinalScore(dto.finalScore()); + } + + if (dto.status() != null) { + simulation.setStatus(dto.status()); + } + simulation.setUpdatedAt(LocalDateTime.now()); + + return repository.save(simulation); + }) + .flatMap(this::toResponse); + } + + public Mono softDelete(Long id) { + return repository.findByIdAndDeletedAtIsNull(id) + .switchIfEmpty(Mono.error(ApiException.notFound("Simulation not found!"))) + .flatMap(simulation -> { + simulation.markAsDelete(); + return repository.save(simulation); + }) + .then(); + } + + public Mono restore(Long id) { + return repository.findById(id) + .switchIfEmpty(Mono.error(ApiException.notFound("Simulation not found"))) + .flatMap(simulation -> { + if (simulation.getDeletedAt() == null) { + return Mono.error(ApiException.conflict("Simulation is not deleted")); + } + simulation.restore(); + return repository.save(simulation); + }) + .then(); + } + + public Mono hardDelete(Long id) { + return repository.findByIdAndDeletedAtIsNotNull(id) + .switchIfEmpty(Mono.error(ApiException.notFound("Simulation not found or not in trash"))) + .flatMap(repository::delete).then(); + } + + private Mono toResponse(Simulation simulation) { + Mono accountMono = accountRepository.findById(simulation.getAccountId()) + .map(this::toAccountResponse) + .switchIfEmpty(Mono.just(new AccountBasicResponse(simulation.getAccountId(), null, null))); + + Mono statementMono = simulation.getStatementId() != null + ? statementRepository.findById(simulation.getStatementId()) + .map(this::toStatementResponse) + .switchIfEmpty(Mono.just(new StatementBasicResponse(null, null, null, null, null, null))) + : Mono.just(new StatementBasicResponse(null, null, null, null, null, null)); + + Mono schoolYearMono = simulation.getSchoolYearId() != null + ? schoolYearRepository.findById(simulation.getSchoolYearId()) + .map(this::toSchoolYearResponse) + .switchIfEmpty(Mono.just(new SchoolYearResponse(null, null, null, null, null, null))) + : Mono.just(new SchoolYearResponse(null, null, null, null, null, null)); + + return Mono.zip(accountMono, statementMono, schoolYearMono) + .map(tuple -> new SimulationResponse( + simulation.getId(), + tuple.getT1(), + tuple.getT2(), + tuple.getT3(), + simulation.getStartedAt(), + simulation.getFinishedAt(), + simulation.getTimeSpentSeconds(), + simulation.getFinalScore(), + simulation.getStatus(), + simulation.getCreatedAt(), + simulation.getUpdatedAt(), + simulation.getDeletedAt() + )); + } + + private AccountBasicResponse toAccountResponse(Account account) { + return new AccountBasicResponse(account.getId(), account.getUsername(), account.getEmail()); + } + + private StatementBasicResponse toStatementResponse(Statement statement) { + return new StatementBasicResponse( + statement.getId(), + statement.getExamType(), + statement.getVariant(), + statement.getTitle(), + statement.getDurationMinutes(), + statement.getTotalMaxScore() + ); + } + + private SchoolYearResponse toSchoolYearResponse(SchoolYear schoolYear) { + return new SchoolYearResponse( + schoolYear.getId(), + schoolYear.getStartYear(), + schoolYear.getEndYear(), + schoolYear.getCreatedAt(), + schoolYear.getUpdatedAt(), + schoolYear.getDeletedAt() + ); + } +} diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/service/StatementService.java b/services/backend-api/src/main/java/ao/creativemode/kixi/service/StatementService.java index a3f033f..2b53487 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/service/StatementService.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/service/StatementService.java @@ -1,245 +1,632 @@ package ao.creativemode.kixi.service; +import ao.creativemode.kixi.client.OcrServiceClient; import ao.creativemode.kixi.common.exception.ApiException; -import ao.creativemode.kixi.dto.schoolyears.StatementRequest; +import ao.creativemode.kixi.dto.ocr.OcrResponse; +import ao.creativemode.kixi.dto.ocr.OcrResponse.ExtractedOption; +import ao.creativemode.kixi.dto.ocr.OcrResponse.ExtractedQuestion; +import ao.creativemode.kixi.dto.ocr.OcrResponse.OcrMetadata; +import ao.creativemode.kixi.model.Question; +import ao.creativemode.kixi.model.QuestionOption; import ao.creativemode.kixi.model.Statement; +import ao.creativemode.kixi.repository.QuestionOptionRepository; +import ao.creativemode.kixi.repository.QuestionRepository; import ao.creativemode.kixi.repository.StatementRepository; -import ao.creativemode.kixi.dto.schoolyears.StatementResponse; -import org.springframework.stereotype.Service; -import reactor.core.publisher.Mono; - import java.time.LocalDateTime; -import java.util.ArrayList; import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.codec.multipart.FilePart; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; +/** + * Service for managing Statement entities and OCR integration. + * + * Provides business logic for: + * - CRUD operations on statements + * - OCR-based statement creation from images + * - Mapping OCR results to domain entities + * - Managing questions and options + * + * For full OCR processing with entity lookup/creation, use OcrPersistenceService. + */ @Service public class StatementService { - private final StatementRepository repository; - public StatementService(StatementRepository repository) { - this.repository = repository; + private static final Logger log = LoggerFactory.getLogger( + StatementService.class + ); + + private static final double LOW_CONFIDENCE_THRESHOLD = 0.8; + private static final double MIN_CONFIDENCE_THRESHOLD = 0.5; + + private final StatementRepository statementRepository; + private final QuestionRepository questionRepository; + private final QuestionOptionRepository optionRepository; + private final OcrServiceClient ocrServiceClient; + + public StatementService( + StatementRepository statementRepository, + QuestionRepository questionRepository, + QuestionOptionRepository optionRepository, + OcrServiceClient ocrServiceClient + ) { + this.statementRepository = statementRepository; + this.questionRepository = questionRepository; + this.optionRepository = optionRepository; + this.ocrServiceClient = ocrServiceClient; } - public Mono> listAllActive() { - return repository.findByDeletedAtIsNull() - .map(this::toResponse) - .collectList() - .onErrorResume(e -> Mono.error( - ApiException.badRequest("Error listing statements: " + e.getMessage()) - )); + // ========================================================================= + // OCR Integration (Legacy - for simple cases without entity lookup) + // ========================================================================= + + /** + * Create a statement from uploaded images using OCR. + * + * Note: For full entity lookup/creation (SchoolYear, Course, Subject, Class), + * use OcrPersistenceService.processAndPersist() instead. + * + * @param files List of uploaded image files + * @param createdBy ID of the user creating the statement + * @return Mono containing the created statement with questions + */ + @Transactional + public Mono createFromOcr( + List files, + Long createdBy + ) { + log.info( + "Creating statement from OCR: {} file(s), createdBy={}", + files.size(), + createdBy + ); + + return ocrServiceClient + .extractText(files) + .flatMap(ocrResponse -> { + if (ocrResponse.isError()) { + log.error( + "OCR extraction failed: {}", + ocrResponse.errorMessage() + ); + return Mono.error( + ApiException.badRequest( + "OCR extraction failed: " + + ocrResponse.errorMessage() + ) + ); + } + + log.info( + "OCR extraction successful: requestId={}, confidence={}, questions={}", + ocrResponse.requestId(), + ocrResponse.overallConfidence(), + ocrResponse.questions() != null + ? ocrResponse.questions().size() + : 0 + ); + + return createStatementFromOcrResponse(ocrResponse, createdBy); + }) + .doOnSuccess(result -> + log.info( + "Statement created from OCR: statementId={}, questions={}", + result.statement().getId(), + result.questions().size() + ) + ) + .doOnError(error -> + log.error("Failed to create statement from OCR", error) + ); } - public Mono> listTrashed() { - return repository.findByDeletedAtIsNotNull() - .map(this::toResponse) - .collectList() - .onErrorResume(e -> Mono.error( - ApiException.badRequest("Error listing deleted statements: " + e.getMessage()) - )); + /** + * Create a statement from an OCR response. + * + * @param ocrResponse The OCR response containing extracted data + * @param createdBy ID of the user creating the statement + * @return Mono containing the created statement with questions + */ + @Transactional + public Mono createStatementFromOcrResponse( + OcrResponse ocrResponse, + Long createdBy + ) { + // Create and populate statement from metadata + Statement statement = mapMetadataToStatement( + ocrResponse.metadata(), + ocrResponse + ); + statement.setCreatedBy(createdBy); + statement.setOcrMetadata( + ocrResponse.requestId(), + ocrResponse.overallConfidence(), + ocrResponse.needsReview() + ); + statement.setSource("ocr"); + + // Calculate total max score from questions + if (ocrResponse.questions() != null) { + double totalScore = ocrResponse + .questions() + .stream() + .filter(q -> q.getCotacaoValue() != null) + .mapToDouble(ExtractedQuestion::getCotacaoValue) + .sum(); + if (totalScore > 0) { + statement.setTotalMaxScore(totalScore); + } + } + + // Save statement first + return statementRepository + .save(statement) + .flatMap(savedStatement -> { + if ( + ocrResponse.questions() == null || + ocrResponse.questions().isEmpty() + ) { + return Mono.just( + new StatementWithQuestions( + savedStatement, + List.of(), + List.of() + ) + ); + } + + // Create and save questions + return createQuestionsFromOcr( + savedStatement.getId(), + ocrResponse.questions() + ) + .collectList() + .flatMap(savedQuestions -> { + // Collect all question IDs + List questionIds = savedQuestions + .stream() + .map(Question::getId) + .toList(); + + // Load all options for these questions + return optionRepository + .findAllByQuestionIds(questionIds) + .collectList() + .map(options -> + new StatementWithQuestions( + savedStatement, + savedQuestions, + options + ) + ); + }); + }); } - public Mono getById(Long id) { - if (id == null || id <= 0) { - return Mono.error(ApiException.badRequest("Statement ID is required and must be greater than zero")); + /** + * Map OCR metadata to Statement entity. + */ + private Statement mapMetadataToStatement( + OcrMetadata metadata, + OcrResponse ocrResponse + ) { + Statement statement = new Statement(); + + if (metadata != null) { + // Title + if (metadata.title() != null && metadata.title().value() != null) { + statement.setTitle(metadata.title().value()); + } else { + statement.setTitle( + "Imported Statement - " + LocalDateTime.now() + ); + } + + // Exam type + if ( + metadata.examType() != null && + metadata.examType().value() != null + ) { + statement.setExamType(metadata.examType().value()); + } else { + statement.setExamType("Prova de Exame"); + } + + // Duration + if ( + metadata.durationMinutes() != null && + metadata.durationMinutes().value() != null + ) { + statement.setDurationMinutes( + metadata.durationMinutes().value() + ); + } + + // Variant + if ( + metadata.variant() != null && metadata.variant().value() != null + ) { + statement.setVariant(metadata.variant().value()); + } + + // Instructions + if ( + metadata.instructions() != null && + metadata.instructions().value() != null + ) { + statement.setInstructions(metadata.instructions().value()); + } + + // Total max score from metadata + if (metadata.getTotalMaxScoreValue() != null) { + statement.setTotalMaxScore(metadata.getTotalMaxScoreValue()); + } + + // Note: schoolYearId, termId, subjectId, classId, courseId + // are resolved in OcrPersistenceService which does the full lookup } - return repository.findByIdAndDeletedAtIsNull(id) - .switchIfEmpty(Mono.error( - ApiException.notFound("Statement with ID " + id + " not found") - )) - .map(this::toResponse); + // Set OCR-specific fields + statement.setVisible(false); // Require manual review before publishing + statement.setNeedsReview( + ocrResponse.needsReview() || + (ocrResponse.overallConfidence() != null && + ocrResponse.overallConfidence() < LOW_CONFIDENCE_THRESHOLD) + ); + + return statement; + } + + /** + * Create questions from OCR extracted questions. + */ + private Flux createQuestionsFromOcr( + Long statementId, + List extractedQuestions + ) { + return Flux.fromIterable(extractedQuestions) + .index() + .flatMap(tuple -> { + int index = tuple.getT1().intValue(); + ExtractedQuestion extracted = tuple.getT2(); + + Question question = mapExtractedToQuestion( + statementId, + extracted, + index + ); + + return questionRepository + .save(question) + .flatMap(savedQuestion -> { + // Create options if this is a multiple choice question + if ( + extracted.options() != null && + !extracted.options().isEmpty() + ) { + return createOptionsFromOcr( + savedQuestion.getId(), + extracted.options() + ).then(Mono.just(savedQuestion)); + } + return Mono.just(savedQuestion); + }); + }); } - public Mono update(Long id, StatementRequest request) { - if (id == null || id <= 0) { - return Mono.error(ApiException.badRequest("Statement ID is required and must be greater than zero")); + /** + * Map extracted question to Question entity. + */ + private Question mapExtractedToQuestion( + Long statementId, + ExtractedQuestion extracted, + int orderIndex + ) { + Question question = new Question(); + question.setStatementId(statementId); + + // Parse number (might be string like "1", "2a", etc.) + try { + String numStr = + extracted.number() != null + ? extracted.number().replaceAll("[^0-9]", "") + : ""; + question.setNumber( + numStr.isEmpty() ? orderIndex + 1 : Integer.parseInt(numStr) + ); + } catch (NumberFormatException e) { + question.setNumber(orderIndex + 1); } - return validateRequest(request) - .then(repository.findByIdAndDeletedAtIsNull(id)) - .switchIfEmpty(Mono.error( - ApiException.notFound("Statement with ID " + id + " not found for update") - )) - .flatMap(statement -> { - statement.setTitle(request.getTitle()); - statement.setExamType(request.getExamType()); - statement.setDurationMinutes(request.getDurationMinutes()); - statement.setVariant(request.getVariant()); - statement.setInstructions(request.getInstructions()); - statement.setTotalMaxScore(request.getTotalMaxScore()); - statement.setSchoolYearId(request.getSchoolYearId()); - statement.setTermId(request.getTermId()); - statement.setSubjectId(request.getSubjectId()); - statement.setClassId(request.getClassId()); - statement.setCourseId(request.getCourseId()); - statement.setVisible(request.getVisible()); - statement.setUpdatedAt(LocalDateTime.now()); - return repository.save(statement); - }) - .map(this::toResponse) - .onErrorResume(ApiException.class, Mono::error) - .onErrorResume(e -> Mono.error( - ApiException.badRequest("Error updating statement: " + e.getMessage()) - )); - } + question.setOrderIndex(orderIndex); - public Mono softDelete(Long id) { - if (id == null || id <= 0) { - return Mono.error(ApiException.badRequest("Statement ID is required and must be greater than zero")); + // Text + question.setText(extracted.getTextValue()); + + // Question type - map from Portuguese to database format + String type = extracted.getTypeValue(); + question.setQuestionType(mapQuestionType(type)); + + // Cotação (max score) + if (extracted.getCotacaoValue() != null) { + question.setMaxScore(extracted.getCotacaoValue()); } - return repository.findByIdAndDeletedAtIsNull(id) - .switchIfEmpty(Mono.error( - ApiException.notFound("Statement with ID " + id + " not found for deletion") - )) - .flatMap(statement -> { - statement.setDeletedAt(LocalDateTime.now()); - return repository.save(statement); - }) - .then() - .onErrorResume(ApiException.class, Mono::error) - .onErrorResume(e -> Mono.error( - ApiException.badRequest("Error deleting statement: " + e.getMessage()) - )); + // OCR metadata + question.setOcrConfidence(extracted.confidence()); + question.setPageIndex(extracted.pageIndex()); + + // Mark for review if low confidence + question.setNeedsReview( + extracted.confidence() != null && + extracted.confidence() < LOW_CONFIDENCE_THRESHOLD + ); + + return question; } - public Mono restore(Long id) { - if (id == null || id <= 0) { - return Mono.error(ApiException.badRequest("Statement ID is required and must be greater than zero")); + /** + * Map question type from Portuguese to database format. + */ + private String mapQuestionType(String type) { + if (type == null) { + return "unknown"; } + return switch (type.toLowerCase()) { + case "dissertativa" -> "development"; + case "multipla_escolha" -> "multiple_choice"; + default -> type; + }; + } - return repository.findById(id) - .switchIfEmpty(Mono.error( - ApiException.notFound("Statement with ID " + id + " not found") - )) - .filter(statement -> statement.getDeletedAt() != null) - .switchIfEmpty(Mono.error( - ApiException.badRequest("Statement with ID " + id + " is not deleted and cannot be restored") - )) - .flatMap(statement -> { - statement.setDeletedAt(null); - return repository.save(statement); - }) - .then() - .onErrorResume(ApiException.class, Mono::error) - .onErrorResume(e -> Mono.error( - ApiException.badRequest("Error restoring statement: " + e.getMessage()) - )); + /** + * Create options from OCR extracted options. + */ + private Flux createOptionsFromOcr( + Long questionId, + List extractedOptions + ) { + return Flux.fromIterable(extractedOptions) + .index() + .flatMap(tuple -> { + int index = tuple.getT1().intValue(); + ExtractedOption extracted = tuple.getT2(); + + QuestionOption option = new QuestionOption(); + option.setQuestionId(questionId); + option.setOptionLabel(extracted.optionLabel()); + option.setOptionText(extracted.optionText()); + option.setOrderIndex(index); + option.setOcrConfidence(extracted.confidence()); + option.setIsCorrect(false); // OCR cannot determine correct answer + + return optionRepository.save(option); + }); } - public Mono hardDelete(Long id) { - if (id == null || id <= 0) { - return Mono.error(ApiException.badRequest("Statement ID is required and must be greater than zero")); - } + // ========================================================================= + // CRUD Operations + // ========================================================================= - return repository.findById(id) - .switchIfEmpty(Mono.error( - ApiException.notFound("Statement with ID " + id + " not found for permanent deletion") - )) - .flatMap(statement -> repository.deleteById(id)) - .onErrorResume(ApiException.class, Mono::error) - .onErrorResume(e -> Mono.error( - ApiException.badRequest("Error permanently deleting statement: " + e.getMessage()) - )); - } - - public Mono create(StatementRequest request) { - return validateRequest(request) - .then(Mono.defer(() -> { - Statement statement = new Statement(); - statement.setTitle(request.getTitle()); - statement.setExamType(request.getExamType()); - statement.setDurationMinutes(request.getDurationMinutes()); - statement.setVariant(request.getVariant()); - statement.setInstructions(request.getInstructions()); - statement.setTotalMaxScore(request.getTotalMaxScore()); - statement.setSchoolYearId(request.getSchoolYearId()); - statement.setTermId(request.getTermId()); - statement.setSubjectId(request.getSubjectId()); - statement.setClassId(request.getClassId()); - statement.setCourseId(request.getCourseId()); - statement.setVisible(request.getVisible() != null ? request.getVisible() : false); - statement.setCreatedAt(LocalDateTime.now()); - - return repository.save(statement); - })) - .map(this::toResponse) - .onErrorResume(ApiException.class, Mono::error) - .onErrorResume(e -> Mono.error( - ApiException.badRequest("Error creating statement: " + e.getMessage()) - )); - } - - private Mono validateRequest(StatementRequest request) { - List errors = new ArrayList<>(); - - if (request == null) { - return Mono.error(ApiException.badRequest("Statement data is required")); - } + /** + * Find all active (non-deleted) statements. + */ + public Flux findAllActive() { + return statementRepository.findAllByDeletedAtIsNull(); + } - if (request.getTitle() == null || request.getTitle().isBlank()) { - errors.add("Title is required"); - } else if (request.getTitle().length() < 3) { - errors.add("Title must have at least 3 characters"); - } else if (request.getTitle().length() > 255) { - errors.add("Title must have at most 255 characters"); - } + /** + * Find all soft-deleted statements. + */ + public Flux findAllDeleted() { + return statementRepository.findAllByDeletedAtIsNotNull(); + } - if (request.getExamType() == null || request.getExamType().isBlank()) { - errors.add("Exam type is required"); - } + /** + * Find a statement by ID. + */ + public Mono findById(Long id) { + return statementRepository + .findByIdAndDeletedAtIsNull(id) + .switchIfEmpty( + Mono.error(ApiException.notFound("Statement not found: " + id)) + ); + } - if (request.getDurationMinutes() != null && request.getDurationMinutes() <= 0) { - errors.add("Duration must be greater than zero"); - } + /** + * Find a statement with its questions. + */ + public Mono findByIdWithQuestions(Long id) { + return findById(id).flatMap(statement -> + questionRepository + .findAllByStatementIdOrderByOrderIndex(statement.getId()) + .collectList() + .flatMap(questions -> { + if (questions.isEmpty()) { + return Mono.just( + new StatementWithQuestions( + statement, + List.of(), + List.of() + ) + ); + } + + List questionIds = questions + .stream() + .map(Question::getId) + .toList(); + + return optionRepository + .findAllByQuestionIds(questionIds) + .collectList() + .map(options -> + new StatementWithQuestions( + statement, + questions, + options + ) + ); + }) + ); + } - if (request.getTotalMaxScore() != null && request.getTotalMaxScore() < 0) { - errors.add("Maximum score cannot be negative"); - } + /** + * Find statements needing review. + */ + public Flux findNeedingReview() { + return statementRepository.findAllByNeedsReviewTrueAndDeletedAtIsNull(); + } - if (request.getSchoolYearId() == null) { - errors.add("School year is required"); - } + /** + * Find statements created via OCR. + */ + public Flux findFromOcr() { + return statementRepository.findAllFromOcr(); + } - if (request.getTermId() == null) { - errors.add("Term is required"); - } + /** + * Find statements by school year. + */ + public Flux findBySchoolYear(Long schoolYearId) { + return statementRepository.findAllBySchoolYearIdAndDeletedAtIsNull( + schoolYearId + ); + } - if (request.getSubjectId() == null) { - errors.add("Subject is required"); - } + /** + * Find statements by subject. + */ + public Flux findBySubject(Long subjectId) { + return statementRepository.findAllBySubjectIdAndDeletedAtIsNull( + subjectId + ); + } - if (request.getClassId() == null) { - errors.add("Class is required"); - } + /** + * Search statements by title. + */ + public Flux searchByTitle(String searchTerm) { + return statementRepository.searchByTitle(searchTerm); + } - if (!errors.isEmpty()) { - String errorMessage = String.join("; ", errors); - return Mono.error(ApiException.badRequest("Validation errors: " + errorMessage)); - } + /** + * Save a statement. + */ + public Mono save(Statement statement) { + return statementRepository.save(statement); + } - return Mono.empty(); + /** + * Soft delete a statement. + */ + @Transactional + public Mono softDelete(Long id) { + return findById(id) + .flatMap(statement -> { + statement.markAsDeleted(); + return statementRepository.save(statement); + }) + .then(); + } + + /** + * Restore a soft-deleted statement. + */ + @Transactional + public Mono restore(Long id) { + return statementRepository + .findByIdAndDeletedAtIsNotNull(id) + .switchIfEmpty( + Mono.error( + ApiException.notFound("Deleted statement not found: " + id) + ) + ) + .flatMap(statement -> { + statement.restore(); + return statementRepository.save(statement); + }) + .then(); } + /** + * Hard delete a statement and its questions/options. + */ + @Transactional + public Mono hardDelete(Long id) { + return findById(id).flatMap(statement -> + questionRepository + .findAllByStatementIdOrderByOrderIndex(statement.getId()) + .flatMap(question -> + optionRepository + .softDeleteAllByQuestionId(question.getId()) + .then(questionRepository.delete(question)) + ) + .then(statementRepository.delete(statement)) + ); + } + /** + * Approve a statement review. + */ + @Transactional + public Mono approveReview(Long id) { + return findById(id).flatMap(statement -> { + statement.approveReview(); + return statementRepository.save(statement); + }); + } - private StatementResponse toResponse(Statement statement) { - StatementResponse response = new StatementResponse(); - response.setId(statement.getId()); - response.setExamType(statement.getExamType()); - response.setDurationMinutes(statement.getDurationMinutes()); - response.setVariant(statement.getVariant()); - response.setTitle(statement.getTitle()); - response.setInstructions(statement.getInstructions()); - response.setTotalMaxScore(statement.getTotalMaxScore()); - response.setSchoolYearId(statement.getSchoolYearId()); - response.setTermId(statement.getTermId()); - response.setSubjectId(statement.getSubjectId()); - response.setClassId(statement.getClassId()); - response.setCourseId(statement.getCourseId()); - response.setVisible(statement.getVisible()); - response.setCreatedAt(statement.getCreatedAt()); - response.setUpdatedAt(statement.getUpdatedAt()); - return response; + /** + * Set statement visibility. + */ + @Transactional + public Mono setVisible(Long id, boolean visible) { + return findById(id).flatMap(statement -> { + statement.setVisible(visible); + return statementRepository.save(statement); + }); } + + // ========================================================================= + // Statistics + // ========================================================================= + + /** + * Count active statements. + */ + public Mono countActive() { + return statementRepository.countByDeletedAtIsNull(); + } + + /** + * Count statements needing review. + */ + public Mono countNeedingReview() { + return statementRepository.countByNeedsReviewTrueAndDeletedAtIsNull(); + } + + /** + * Count statements by source. + */ + public Mono countBySource(String source) { + return statementRepository.countBySourceAndDeletedAtIsNull(source); + } + + // ========================================================================= + // Result Records + // ========================================================================= + + /** + * Statement with its questions and options. + */ + public record StatementWithQuestions( + Statement statement, + List questions, + List options + ) {} } diff --git a/services/backend-api/src/main/java/ao/creativemode/kixi/service/SubjectService.java b/services/backend-api/src/main/java/ao/creativemode/kixi/service/SubjectService.java index 6992959..39f964a 100644 --- a/services/backend-api/src/main/java/ao/creativemode/kixi/service/SubjectService.java +++ b/services/backend-api/src/main/java/ao/creativemode/kixi/service/SubjectService.java @@ -1,106 +1,132 @@ -package ao.creativemode.kixi.service; - -import ao.creativemode.kixi.dto.subject.SubjectRequest; -import ao.creativemode.kixi.dto.subject.SubjectResponse; -import ao.creativemode.kixi.model.Subject; -import ao.creativemode.kixi.common.exception.ApiException; -import ao.creativemode.kixi.repository.SubjectRepository; -import org.springframework.dao.DataIntegrityViolationException; -import org.springframework.stereotype.Service; -import reactor.core.publisher.Flux; -import reactor.core.publisher.Mono; - - - -@Service -public class SubjectService { - - private final SubjectRepository repository; - - public SubjectService(SubjectRepository repository){ - this.repository = repository; - } - - public Flux findAllActive(){ - return repository.findAllByDeletedAtIsNull().map(this::toResponse); - } - - public Flux findAllDeleted(){ - return repository.findAllByDeletedAtIsNotNull().map(this::toResponse); - } - - public Mono findByCodeActive(Long id){ - return repository.findByIdAndDeletedAtIsNull(id) - .switchIfEmpty(Mono.error(ApiException.notFound("Subject not found"))) - .map(this::toResponse); - } - - public Mono create(SubjectRequest data){ - Subject newSubject = new Subject(); - newSubject.setCode(data.code()); - newSubject.setName(data.name()); - newSubject.setShortName(data.shortName()); - newSubject.setDeletedAt(null); - - return repository.save(newSubject) - .map(this::toResponse) - .onErrorMap(DataIntegrityViolationException.class, - e-> ApiException.conflict("Subject with code " + data.code() + " already exists.")); - } - - public Mono update(Long id, SubjectRequest data){ - return repository.findByIdAndDeletedAtIsNull(id) - .switchIfEmpty(Mono.error(ApiException.notFound("Subject not found"))) - .flatMap(subject -> { - String newCode = data.code(); - String newName = data.name(); - String newShortName = data.shortName(); - - - subject.setCode(newCode); - subject.setName(newName); - subject.setShortName(newShortName); - return repository.save(subject) - .onErrorMap(DataIntegrityViolationException.class, - e -> ApiException.conflict("Another subject with this name already exists, please choose a different name.")); - }).map(this::toResponse); - } - - public Mono softDelete(Long id){ - return repository.findByIdAndDeletedAtIsNull(id) - .switchIfEmpty(Mono.error(ApiException.notFound("Subject not found"))) - .flatMap(subject -> { - subject.markAsDeleted(); - return repository.save(subject); - }).then(); - } - - public Mono restore(Long id){ - return repository.findByIdAndDeletedAtIsNotNull(id) - .switchIfEmpty(Mono.error(ApiException.notFound("Subject not found"))) - .flatMap(subject -> { - subject.restore(); - return repository.save(subject); - }).then(); - } - - public Mono hardDelete(Long id){ - return repository.findByIdAndDeletedAtIsNotNull(id) - .switchIfEmpty(Mono.error(ApiException.notFound("Only deleted subject can be permanently removed"))) - .flatMap(repository::delete) - .then(); - } - - - - private SubjectResponse toResponse(Subject entity) { - return new SubjectResponse( - entity.getId(), - entity.getCode(), - entity.getName(), - entity.getShortName(), - entity.getCreatedAt(), - entity.getUpdatedAt(), - entity.getDeletedAt()); - } -} +package ao.creativemode.kixi.service; + +import ao.creativemode.kixi.common.exception.ApiException; +import ao.creativemode.kixi.dto.subject.SubjectRequest; +import ao.creativemode.kixi.dto.subject.SubjectResponse; +import ao.creativemode.kixi.model.Subject; +import ao.creativemode.kixi.repository.SubjectRepository; +import org.springframework.dao.DataIntegrityViolationException; +import org.springframework.stereotype.Service; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +@Service +public class SubjectService { + + private final SubjectRepository repository; + + public SubjectService(SubjectRepository repository) { + this.repository = repository; + } + + public Flux findAllActive() { + return repository.findAllByDeletedAtIsNull().map(this::toResponse); + } + + public Flux findAllDeleted() { + return repository.findAllByDeletedAtIsNotNull().map(this::toResponse); + } + + public Mono findByCodeActive(String code) { + return repository + .findByCodeAndDeletedAtIsNull(code) + .switchIfEmpty( + Mono.error(ApiException.notFound("Subject not found")) + ) + .map(this::toResponse); + } + + public Mono create(SubjectRequest data) { + Subject newSubject = new Subject(); + newSubject.setCode(data.code()); + newSubject.setName(data.name()); + newSubject.setShortName(data.shortName()); + newSubject.setDeletedAt(null); + + return repository + .save(newSubject) + .map(this::toResponse) + .onErrorMap(DataIntegrityViolationException.class, e -> + ApiException.conflict( + "Subject with code " + data.code() + " already exists." + ) + ); + } + + public Mono update(String code, SubjectRequest data) { + return repository + .findByCodeAndDeletedAtIsNull(code) + .switchIfEmpty( + Mono.error(ApiException.notFound("Subject not found")) + ) + .flatMap(subject -> { + String newCode = data.code(); + String newName = data.name(); + String newShortName = data.shortName(); + + subject.setCode(newCode); + subject.setName(newName); + subject.setShortName(newShortName); + return repository + .save(subject) + .onErrorMap(DataIntegrityViolationException.class, e -> + ApiException.conflict( + "Another subject with this code already exists, please choose a different code." + ) + ); + }) + .map(this::toResponse); + } + + public Mono softDelete(String code) { + return repository + .findByCodeAndDeletedAtIsNull(code) + .switchIfEmpty( + Mono.error(ApiException.notFound("Subject not found")) + ) + .flatMap(subject -> { + subject.markAsDeleted(); + return repository.save(subject); + }) + .then(); + } + + public Mono restore(String code) { + return repository + .findByCodeAndDeletedAtIsNotNull(code) + .switchIfEmpty( + Mono.error(ApiException.notFound("Subject not found")) + ) + .flatMap(subject -> { + subject.restore(); + return repository.save(subject); + }) + .then(); + } + + public Mono hardDelete(String code) { + return repository + .findByCodeAndDeletedAtIsNotNull(code) + .switchIfEmpty( + Mono.error( + ApiException.notFound( + "Only deleted subject can be permanently removed" + ) + ) + ) + .flatMap(repository::delete) + .then(); + } + + private SubjectResponse toResponse(Subject entity) { + return new SubjectResponse( + entity.getId(), + entity.getCode(), + entity.getName(), + entity.getShortName(), + entity.getCreatedAt(), + entity.getUpdatedAt(), + entity.getDeletedAt() + ); + } +} diff --git a/services/backend-api/src/main/resources/application.yml b/services/backend-api/src/main/resources/application.yml new file mode 100644 index 0000000..9f1af3e --- /dev/null +++ b/services/backend-api/src/main/resources/application.yml @@ -0,0 +1,41 @@ +spring: + application: + name: kixi-backend-api + + r2dbc: + url: r2dbc:postgresql://${DB_HOST:localhost}:${DB_PORT:5432}/${DB_NAME:kixi_db} + username: ${DB_USERNAME:postgres} + password: ${DB_PASSWORD:postgres} + + flyway: + enabled: false + +server: + port: 8080 + +management: + endpoints: + web: + exposure: + include: health,info + endpoint: + health: + show-details: always + +groq: + api: + key: ${GROQ_API_KEY:} + url: ${GROQ_API_URL:https://api.groq.com/openai/v1} + model: ${GROQ_MODEL:llama-3.3-70b-versatile} + +springdoc: + api-docs: + path: /api-docs + swagger-ui: + path: /swagger-ui.html + +logging: + level: + root: INFO + ao.creativemode.kixi: DEBUG + org.springframework.r2dbc: DEBUG diff --git a/services/backend-api/src/main/resources/db/migration/V1_create_term_table.sql b/services/backend-api/src/main/resources/db/migration/V10__create_term_table.sql similarity index 88% rename from services/backend-api/src/main/resources/db/migration/V1_create_term_table.sql rename to services/backend-api/src/main/resources/db/migration/V10__create_term_table.sql index 9fe94ca..733af90 100644 --- a/services/backend-api/src/main/resources/db/migration/V1_create_term_table.sql +++ b/services/backend-api/src/main/resources/db/migration/V10__create_term_table.sql @@ -4,7 +4,7 @@ CREATE TABLE IF NOT EXISTS terms ( name VARCHAR(255) NOT NULL, created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL, updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL, - deleted_at TIMESTAMP WITH TIME ZONE DEFAULT NULL, + deleted_at TIMESTAMP WITH TIME ZONE DEFAULT NULL ); CREATE INDEX idx_terms_deleted_at ON terms(deleted_at); diff --git a/services/backend-api/src/main/resources/db/migration/V14__create_question_images_table.sql b/services/backend-api/src/main/resources/db/migration/V14__create_question_images_table.sql new file mode 100644 index 0000000..f754e6f --- /dev/null +++ b/services/backend-api/src/main/resources/db/migration/V14__create_question_images_table.sql @@ -0,0 +1,20 @@ +-- Create table for storing question-related images +CREATE TABLE question_images ( + id BIGSERIAL PRIMARY KEY, + question_id BIGSERIAL NOT NULL, + image_url TEXT NOT NULL, + caption TEXT, + order_index INTEGER DEFAULT 0, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + deleted_at TIMESTAMP, + + -- Foreign key constraint linking to the questions table + CONSTRAINT fk_question_images_question + FOREIGN KEY (question_id) + REFERENCES questions (id) + ON DELETE CASCADE +); + +-- Index to optimize lookups and filtering by question_id +CREATE INDEX idx_question_images_question_id ON question_images(question_id); \ No newline at end of file diff --git a/services/backend-api/src/main/resources/db/migration/V1__create_simulation_answer5_table.sql b/services/backend-api/src/main/resources/db/migration/V1__create_simulation_answer5_table.sql new file mode 100644 index 0000000..5189660 --- /dev/null +++ b/services/backend-api/src/main/resources/db/migration/V1__create_simulation_answer5_table.sql @@ -0,0 +1,23 @@ +CREATE TABLE simulation_answers( + id BIGSERIAL PRIMARY KEY, + simulation_id BIGINT NOT NULL, + question_id BIGINT NOT NULL, + selected_option_id BIGINT, + answer_text TEXT, + score_obtained REAL NOT NULL DEFAULT 0, + is_correct BOOLEAN, + answered_at TIMESTAMP, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP, + deleted_at TIMESTAMP, + + CONSTRAINT fk_simulation FOREIGN KEY(simulation_id) + REFERENCES simulation(id) + ON DELETE CASCADE, + + CONSTRAINT uq_simulation_question UNIQUE(simulation_id, question_id) +); + +CREATE INDEX idx_simulation_answers_simulation_id ON simulation_answers(simulation_id); +CREATE INDEX idx_simulation_answers_question_id ON simulation_answers(question_id); +CREATE INDEX idx_simulation_answers_deleted_at ON simulation_answers(deleted_at); diff --git a/services/backend-api/src/main/resources/db/migration/V2__create_statement_table.sql b/services/backend-api/src/main/resources/db/migration/V2__create_statement_table.sql index 5b93a29..1bf2bfd 100644 --- a/services/backend-api/src/main/resources/db/migration/V2__create_statement_table.sql +++ b/services/backend-api/src/main/resources/db/migration/V2__create_statement_table.sql @@ -15,7 +15,14 @@ CREATE TABLE statement ( visible BOOLEAN DEFAULT false, create_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL, update_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL, - delete_at TIMESTAMP WITH TIME ZONE DEFAULT NULL + delete_at TIMESTAMP WITH TIME ZONE DEFAULT NULL, + + CONSTRAINT fk_statement_school_year FOREIGN KEY (school_year_id) REFERENCES school_years(id), + CONSTRAINT fk_statement_term FOREIGN KEY (term_id) REFERENCES terms(id), + CONSTRAINT fk_statement_subject FOREIGN KEY (subject_id) REFERENCES subjects(id), + CONSTRAINT fk_statement_class FOREIGN KEY (class_id) REFERENCES classes(id), + CONSTRAINT fk_statement_course FOREIGN KEY (course_id) REFERENCES courses(id), + CONSTRAINT fk_statement_created_by FOREIGN KEY (create_by) REFERENCES accounts(id) ); CREATE INDEX idx_statement_active ON statement (delete_at) WHERE delete_at IS NULL; diff --git a/services/backend-api/src/main/resources/db/migration/V4__create_accounts_table.sql b/services/backend-api/src/main/resources/db/migration/V4__create_accounts_table.sql deleted file mode 100644 index 6e2d792..0000000 --- a/services/backend-api/src/main/resources/db/migration/V4__create_accounts_table.sql +++ /dev/null @@ -1,19 +0,0 @@ -CREATE TABLE accounts ( - id BIGSERIAL PRIMARY KEY, - username VARCHAR(100) NOT NULL UNIQUE, - email VARCHAR(255) NOT NULL UNIQUE, - password_hash VARCHAR(255) NOT NULL, - email_verified BOOLEAN DEFAULT FALSE, - active BOOLEAN DEFAULT TRUE, - last_login TIMESTAMP, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - deleted_at TIMESTAMP, - CONSTRAINT uc_accounts_username UNIQUE (username), - CONSTRAINT uc_accounts_email UNIQUE (email) -); - -CREATE INDEX idx_accounts_username ON accounts(username); -CREATE INDEX idx_accounts_email ON accounts(email); -CREATE INDEX idx_accounts_deleted_at ON accounts(deleted_at); -CREATE INDEX idx_accounts_active ON accounts(active); diff --git a/services/backend-api/src/main/resources/db/migration/V2__create_roles_table.sql b/services/backend-api/src/main/resources/db/migration/V4__create_roles_table.sql similarity index 100% rename from services/backend-api/src/main/resources/db/migration/V2__create_roles_table.sql rename to services/backend-api/src/main/resources/db/migration/V4__create_roles_table.sql diff --git a/services/backend-api/src/main/resources/db/migration/V8__create_questions_table.sql b/services/backend-api/src/main/resources/db/migration/V8__create_questions_table.sql new file mode 100644 index 0000000..aa813ce --- /dev/null +++ b/services/backend-api/src/main/resources/db/migration/V8__create_questions_table.sql @@ -0,0 +1,21 @@ +CREATE TABLE questions ( + id BIGSERIAL PRIMARY KEY, + statement_id BIGINT NOT NULL, + number INTEGER NOT NULL, + text TEXT NOT NULL, + question_type VARCHAR(50) NOT NULL, + max_score DECIMAL(10, 2) NOT NULL, + order_index INTEGER, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP, + deleted_at TIMESTAMP + + -- Ensures a unique sequence of question numbers within a specific statement + -- CONSTRAINT uk_questions_statement_number UNIQUE (statement_id, number) +); + +-- Optimization for foreign key lookups +-- CREATE INDEX idx_questions_statement_id ON questions(statement_id); + +-- Partial index for Soft Delete performance (optimizes retrieval of active records) +CREATE INDEX idx_questions_deleted_at ON questions(deleted_at) WHERE deleted_at IS NULL; \ No newline at end of file diff --git a/services/backend-api/src/main/resources/db/migration/V8__create_simulation_table.sql b/services/backend-api/src/main/resources/db/migration/V8__create_simulation_table.sql new file mode 100644 index 0000000..2d7cd15 --- /dev/null +++ b/services/backend-api/src/main/resources/db/migration/V8__create_simulation_table.sql @@ -0,0 +1,24 @@ +CREATE TABLE simulation ( + id BIGSERIAL PRIMARY KEY, + account_id BIGINT NOT NULL, + statement_id BIGINT NOT NULL, + school_year_id BIGINT, + started_at TIMESTAMP WITH TIME ZONE, + finished_at TIMESTAMP WITH TIME ZONE, + time_spent_seconds INTEGER, + final_score DECIMAL(5,2), + status VARCHAR(20) NOT NULL DEFAULT 'IN_PROGRESS', + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP NOT NULL, + updated_at TIMESTAMP WITH TIME ZONE, + deleted_at TIMESTAMP WITH TIME ZONE, + + CONSTRAINT fk_simulation_account FOREIGN KEY (account_id) REFERENCES accounts(id), + CONSTRAINT fk_simulation_statement FOREIGN KEY (statement_id) REFERENCES statement(id), + CONSTRAINT fk_simulation_school_year FOREIGN KEY (school_year_id) REFERENCES school_years(id), + CONSTRAINT chk_simulation_status CHECK (status IN ('IN_PROGRESS', 'FINISHED', 'CANCELLED')) +); + +CREATE INDEX idx_simulation_active ON simulation (deleted_at) WHERE deleted_at IS NULL; +CREATE INDEX idx_simulation_account ON simulation (account_id) WHERE deleted_at IS NULL; +CREATE INDEX idx_simulation_statement ON simulation (statement_id) WHERE deleted_at IS NULL; +CREATE INDEX idx_simulation_status ON simulation (status) WHERE deleted_at IS NULL; \ No newline at end of file diff --git a/services/ocr-service/Dockerfile b/services/ocr-service/Dockerfile new file mode 100644 index 0000000..5d00d92 --- /dev/null +++ b/services/ocr-service/Dockerfile @@ -0,0 +1,124 @@ +# OCR Service Dockerfile +# Multi-stage build for optimized image size + +# ============================================================================= +# Stage 1: Builder +# ============================================================================= +FROM python:3.11-slim as builder + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 + +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + gcc \ + g++ \ + libffi-dev \ + libssl-dev \ + && rm -rf /var/lib/apt/lists/* + +# Create virtual environment +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# Copy requirements and install dependencies +WORKDIR /app +COPY requirements.txt . + +# Install Python dependencies +RUN pip install --upgrade pip setuptools wheel && \ + pip install -r requirements.txt + +# ============================================================================= +# Stage 2: Runtime +# ============================================================================= +FROM python:3.11-slim as runtime + +# Labels +LABEL maintainer="Kixi Team" \ + service="ocr-service" \ + version="1.0.0" \ + description="OCR Service using PaddleOCR-VL for text extraction" + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONFAULTHANDLER=1 \ + PATH="/opt/venv/bin:$PATH" \ + # Application settings + SERVICE_NAME=ocr-service \ + SERVICE_VERSION=1.0.0 \ + ENVIRONMENT=production \ + DEBUG=false \ + HOST=0.0.0.0 \ + PORT=8000 \ + WORKERS=1 \ + # OCR settings + OCR_LANG=pt \ + OCR_USE_GPU=false \ + OCR_USE_ANGLE_CLS=true \ + OCR_SHOW_LOG=false \ + # Processing settings + MAX_IMAGE_SIZE_MB=20.0 \ + MAX_IMAGES_PER_REQUEST=10 \ + MIN_CONFIDENCE_THRESHOLD=0.5 \ + ENABLE_DESKEW=true \ + ENABLE_DENOISE=true \ + TARGET_DPI=300 \ + # Logging + LOG_LEVEL=INFO \ + LOG_FORMAT=json + +# Install runtime dependencies +# Note: libgl1-mesa-glx was renamed to libgl1 in Debian Trixie +RUN apt-get update && apt-get install -y --no-install-recommends \ + # OpenCV dependencies (Debian Trixie compatible) + libgl1 \ + libglib2.0-0t64 \ + libsm6 \ + libxext6 \ + libxrender1 \ + libgomp1 \ + # PDF processing dependencies (for pdf2image fallback) + poppler-utils \ + # Fonts for rendering + fonts-liberation \ + fonts-dejavu-core \ + # Curl for health checks + curl \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +# Create non-root user for security +RUN groupadd --gid 1000 ocr && \ + useradd --uid 1000 --gid ocr --shell /bin/bash --create-home ocr + +# Copy virtual environment from builder +COPY --from=builder /opt/venv /opt/venv + +# Set working directory +WORKDIR /app + +# Copy application code +COPY --chown=ocr:ocr . . + +# Create directories for models and cache +RUN mkdir -p /home/ocr/.paddleocr && \ + chown -R ocr:ocr /home/ocr/.paddleocr + +# Switch to non-root user +USER ocr + +# Expose port +EXPOSE 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD curl -f http://localhost:8000/health || exit 1 + +# Run the application +CMD ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/services/ocr-service/README.md b/services/ocr-service/README.md index 40174fa..6e42b2b 100644 --- a/services/ocr-service/README.md +++ b/services/ocr-service/README.md @@ -1,3 +1,326 @@ -# OCR Service +# Kixi OCR Service -Documentação e instruções para o serviço de OCR. +A high-performance OCR (Optical Character Recognition) microservice built with FastAPI and PaddleOCR-VL for extracting structured text from exam images and PDFs. + +## Overview + +This service is part of the Kixi platform and is responsible for: + +- Extracting text from exam paper images +- Detecting and structuring questions, options, and metadata +- Processing multi-page PDF documents +- Providing confidence scores for all extracted data +- Supporting Portuguese and other languages + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ OCR Service │ +├─────────────────────────────────────────────────────────────┤ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │ +│ │ FastAPI │──│ OCR Engine │──│ PaddleOCR-VL │ │ +│ │ Routes │ │ (engine.py)│ │ (PP-OCRv4 models) │ │ +│ └─────────────┘ └─────────────┘ └─────────────────────┘ │ +│ │ │ │ +│ │ ┌──────┴──────┐ │ +│ │ │ │ │ +│ ┌──────┴──────┐ │ ┌──────────┴───────┐ │ +│ │ PDF Handler │ │ │ Image Preprocessor│ │ +│ │ │ │ │ (OpenCV) │ │ +│ └─────────────┘ │ └──────────────────┘ │ +│ │ │ +│ ┌──────┴──────┐ │ +│ │Postprocessor│ │ +│ │ (Regex + ML)│ │ +│ └─────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Features + +- **PaddleOCR-VL Integration**: Uses PP-OCRv4 models for high accuracy +- **Multi-language Support**: Optimized for Portuguese, with support for 80+ languages +- **PDF Processing**: Extract text from multi-page PDF documents +- **Image Preprocessing**: Automatic deskewing, noise reduction, and contrast enhancement +- **Structured Output**: Extracts metadata, questions, options with confidence scores +- **Question Type Detection**: Identifies multiple choice, short answer, development, and true/false questions +- **RESTful API**: Clean FastAPI-based HTTP interface +- **Health Checks**: Built-in health check endpoints for container orchestration +- **Prometheus Metrics**: Optional metrics endpoint for monitoring + +## API Endpoints + +### Core Endpoints + +| Method | Endpoint | Description | +|--------|----------|-------------| +| POST | `/ocr/v1/extract` | Extract text from one or more images/PDFs | +| POST | `/ocr/v1/extract/simple` | Simple single-image extraction | +| GET | `/ocr/health` | Detailed health check | +| GET | `/health` | Simple health check | +| GET | `/ocr/v1/supported-languages` | List supported OCR languages | + +### Request Format + +**POST /ocr/v1/extract** + +```bash +curl -X POST http://localhost:8000/ocr/v1/extract \ + -H "Content-Type: multipart/form-data" \ + -F "images[]=@exam_page1.jpg" \ + -F "images[]=@exam_page2.jpg" \ + -F 'context={"languageHint": "pt"}' +``` + +### Response Format + +```json +{ + "status": "success", + "requestId": "req-abc123def456", + "processingTimeMs": 4870, + "overallConfidence": 0.892, + "document": { + "pageCount": 2, + "mainLanguage": "pt", + "hasTables": true + }, + "metadata": { + "schoolYear": { "value": "2024/2025", "confidence": 0.97 }, + "term": { "value": "2º Trimestre", "confidence": 0.94 }, + "subject": { "value": "Matemática", "confidence": 0.93 }, + "examType": { "value": "Avaliação Periódica", "confidence": 0.91 }, + "durationMinutes": { "value": 120, "confidence": 0.88 }, + "variant": { "value": "A", "confidence": 0.96 } + }, + "questions": [ + { + "number": 1, + "confidence": 0.935, + "text": { "value": "Resolva a equação: 3x - 7 = 14", "confidence": 0.96 }, + "questionType": { "value": "short_answer", "confidence": 0.89 }, + "maxScore": { "value": 5, "confidence": 0.92 }, + "options": [], + "pageIndex": 0 + }, + { + "number": 2, + "confidence": 0.918, + "text": { "value": "Qual das opções representa a raiz quadrada de 64?", "confidence": 0.95 }, + "questionType": { "value": "multiple_choice", "confidence": 0.94 }, + "options": [ + { "optionLabel": "A", "optionText": "6", "confidence": 0.97 }, + { "optionLabel": "B", "optionText": "8", "confidence": 0.96 }, + { "optionLabel": "C", "optionText": "7", "confidence": 0.94 }, + { "optionLabel": "D", "optionText": "9", "confidence": 0.95 } + ], + "pageIndex": 0 + } + ], + "warnings": [ + { "code": "LOW_CONFIDENCE", "field": "class", "confidence": 0.76 } + ] +} +``` + +## Quick Start + +### Prerequisites + +- Python 3.11+ +- Docker (optional, recommended) + +### Local Development + +1. **Create virtual environment:** + +```bash +cd services/ocr-service +python -m venv venv +source venv/bin/activate # Linux/macOS +# or: venv\Scripts\activate # Windows +``` + +2. **Install dependencies:** + +```bash +pip install -r requirements.txt +``` + +3. **Configure environment:** + +```bash +cp env.example .env +# Edit .env as needed +``` + +4. **Run the service:** + +```bash +python -m app.main +# or +uvicorn app.main:app --reload --host 0.0.0.0 --port 8000 +``` + +5. **Access API documentation:** + +Open http://localhost:8000/docs (available in debug mode) + +### Docker + +**Build and run:** + +```bash +# Build image +docker build -t kixi-ocr-service . + +# Run container +docker run -d \ + --name ocr-service \ + -p 8000:8000 \ + -e DEBUG=true \ + -e OCR_LANG=pt \ + kixi-ocr-service +``` + +**Using docker-compose (from project root):** + +```bash +docker-compose up --build ocr-service +``` + +## Configuration + +All configuration is done through environment variables. See `env.example` for the complete list. + +### Key Configuration Options + +| Variable | Default | Description | +|----------|---------|-------------| +| `PORT` | 8000 | Server port | +| `OCR_LANG` | pt | Primary OCR language | +| `OCR_USE_GPU` | false | Enable GPU acceleration | +| `OCR_USE_ANGLE_CLS` | true | Detect rotated text | +| `MAX_IMAGE_SIZE_MB` | 20.0 | Maximum upload size | +| `MAX_IMAGES_PER_REQUEST` | 10 | Maximum images per request | +| `MIN_CONFIDENCE_THRESHOLD` | 0.5 | Minimum confidence threshold | +| `ENABLE_DESKEW` | true | Auto-correct image rotation | +| `ENABLE_DENOISE` | true | Apply noise reduction | +| `DEBUG` | false | Enable debug mode | + +## Project Structure + +``` +ocr-service/ +├── app/ +│ ├── __init__.py +│ ├── main.py # FastAPI application entry point +│ ├── api/ +│ │ ├── __init__.py +│ │ ├── routes.py # API endpoints +│ │ └── pdf_handler.py # PDF processing utilities +│ ├── config/ +│ │ ├── __init__.py +│ │ └── settings.py # Configuration management +│ └── ocr/ +│ ├── __init__.py +│ ├── engine.py # PaddleOCR engine wrapper +│ ├── preprocessing.py # Image preprocessing +│ └── postprocessing.py# OCR result parsing +├── tests/ +│ ├── fixtures/ # Test images +│ └── test_engine.py # Unit tests +├── Dockerfile +├── requirements.txt +├── env.example +└── README.md +``` + +## Testing + +### Run Tests + +```bash +# Run all tests +pytest + +# Run with coverage +pytest --cov=app --cov-report=html + +# Run specific test file +pytest tests/test_engine.py -v +``` + +### Manual Testing + +```bash +# Test with a sample image +curl -X POST http://localhost:8000/ocr/v1/extract/simple \ + -F "image=@tests/fixtures/sample_exam.jpg" +``` + +## Performance Considerations + +### Optimization Tips + +1. **GPU Acceleration**: Set `OCR_USE_GPU=true` for 5-10x faster processing (requires CUDA) +2. **Pre-warming**: The service pre-loads models on startup in production mode +3. **Image Size**: Optimal input resolution is 300 DPI +4. **Batch Processing**: Use the multi-image endpoint for multi-page documents + +### Resource Requirements + +| Mode | RAM | CPU | GPU (optional) | +|------|-----|-----|----------------| +| Minimum | 2GB | 2 cores | - | +| Recommended | 4GB | 4 cores | NVIDIA (CUDA 11+) | +| Production | 8GB+ | 4+ cores | NVIDIA T4 or better | + +## Integration with Backend API + +The OCR service is called by the Spring Boot backend API: + +```java +// Example WebClient call from backend-api +WebClient webClient = WebClient.create("http://ocr-service:8000"); + +Mono response = webClient.post() + .uri("/ocr/v1/extract") + .contentType(MediaType.MULTIPART_FORM_DATA) + .body(BodyInserters.fromMultipartData("images", imageResource)) + .retrieve() + .bodyToMono(OCRResponse.class); +``` + +## Troubleshooting + +### Common Issues + +**Model Download Slow/Failing:** +```bash +# Pre-download models manually +python -c "from paddleocr import PaddleOCR; PaddleOCR(lang='pt')" +``` + +**Out of Memory:** +- Reduce `MAX_IMAGE_SIZE_MB` +- Process fewer images per request +- Consider GPU acceleration + +**Low Accuracy:** +- Ensure input images are at least 300 DPI +- Enable preprocessing (`ENABLE_DESKEW=true`, `ENABLE_DENOISE=true`) +- Check if the correct language is configured + +**PDF Processing Fails:** +- Install poppler-utils: `apt-get install poppler-utils` +- Ensure PyMuPDF is installed: `pip install PyMuPDF` + +## Contributing + +See the main project's [CONTRIBUTING.md](../../CONTRIBUTING.md) for guidelines. + +## License + +This service is part of the Kixi platform and is licensed under Apache License 2.0 with Commons Clause. See [LICENSE](../../LICENSE) for details. \ No newline at end of file diff --git a/services/ocr-service/app/__init__.py b/services/ocr-service/app/__init__.py new file mode 100644 index 0000000..fc1078b --- /dev/null +++ b/services/ocr-service/app/__init__.py @@ -0,0 +1 @@ +# OCR Service App diff --git a/services/ocr-service/app/api/__init__.py b/services/ocr-service/app/api/__init__.py new file mode 100644 index 0000000..9c7f58e --- /dev/null +++ b/services/ocr-service/app/api/__init__.py @@ -0,0 +1 @@ +# API module diff --git a/services/ocr-service/app/api/pdf_handler.py b/services/ocr-service/app/api/pdf_handler.py new file mode 100644 index 0000000..0dc1339 --- /dev/null +++ b/services/ocr-service/app/api/pdf_handler.py @@ -0,0 +1,289 @@ +""" +PDF Handler Module + +Utilities for extracting images from PDF files for OCR processing. +Supports multi-page PDFs and various PDF rendering qualities. +""" + +import io +from typing import List, Optional + +import numpy as np +from PIL import Image + +import structlog + +logger = structlog.get_logger(__name__) + + +def is_pdf(content: bytes) -> bool: + """ + Check if content is a PDF file based on magic bytes. + + Args: + content: File content as bytes + + Returns: + True if content is a PDF file + """ + return content[:4] == b'%PDF' + + +def extract_images_from_pdf( + pdf_content: bytes, + dpi: int = 300, + first_page: Optional[int] = None, + last_page: Optional[int] = None, +) -> List[np.ndarray]: + """ + Extract images from PDF pages. + + Uses PyMuPDF (fitz) for PDF rendering. Falls back to pdf2image + if PyMuPDF is not available. + + Args: + pdf_content: PDF file content as bytes + dpi: Resolution for rendering (default 300 DPI) + first_page: First page to extract (1-indexed, optional) + last_page: Last page to extract (1-indexed, optional) + + Returns: + List of images as numpy arrays (BGR format for OpenCV) + + Raises: + ValueError: If the PDF cannot be processed + """ + images = [] + + try: + # Try PyMuPDF first (faster and more reliable) + images = _extract_with_pymupdf(pdf_content, dpi, first_page, last_page) + except ImportError: + logger.warning("PyMuPDF not available, falling back to pdf2image") + try: + images = _extract_with_pdf2image(pdf_content, dpi, first_page, last_page) + except ImportError: + raise ValueError( + "No PDF processing library available. " + "Install either PyMuPDF (fitz) or pdf2image with poppler." + ) + except Exception as e: + logger.error("PDF extraction failed", error=str(e)) + raise ValueError(f"Failed to extract images from PDF: {e}") + + if not images: + raise ValueError("No pages could be extracted from PDF") + + logger.info( + "PDF extraction complete", + num_pages=len(images), + dpi=dpi, + ) + + return images + + +def _extract_with_pymupdf( + pdf_content: bytes, + dpi: int = 300, + first_page: Optional[int] = None, + last_page: Optional[int] = None, +) -> List[np.ndarray]: + """ + Extract images using PyMuPDF (fitz). + + Args: + pdf_content: PDF file content as bytes + dpi: Resolution for rendering + first_page: First page to extract (1-indexed) + last_page: Last page to extract (1-indexed) + + Returns: + List of images as numpy arrays (BGR format) + """ + import fitz # PyMuPDF + + images = [] + + # Open PDF from bytes + pdf_document = fitz.open(stream=pdf_content, filetype="pdf") + + try: + # Calculate page range + total_pages = len(pdf_document) + start_page = (first_page - 1) if first_page else 0 + end_page = last_page if last_page else total_pages + + # Ensure valid range + start_page = max(0, min(start_page, total_pages - 1)) + end_page = max(1, min(end_page, total_pages)) + + # Calculate zoom factor for desired DPI + # Default PDF resolution is 72 DPI + zoom = dpi / 72.0 + matrix = fitz.Matrix(zoom, zoom) + + for page_num in range(start_page, end_page): + page = pdf_document[page_num] + + # Render page to pixmap + pixmap = page.get_pixmap(matrix=matrix, alpha=False) + + # Convert to PIL Image + img_data = pixmap.tobytes("ppm") + pil_image = Image.open(io.BytesIO(img_data)) + + # Convert to numpy array (RGB) + img_array = np.array(pil_image) + + # Convert RGB to BGR for OpenCV compatibility + if len(img_array.shape) == 3 and img_array.shape[2] == 3: + img_array = img_array[:, :, ::-1].copy() + + images.append(img_array) + + logger.debug( + "Extracted PDF page", + page_num=page_num + 1, + size=f"{pixmap.width}x{pixmap.height}", + ) + + finally: + pdf_document.close() + + return images + + +def _extract_with_pdf2image( + pdf_content: bytes, + dpi: int = 300, + first_page: Optional[int] = None, + last_page: Optional[int] = None, +) -> List[np.ndarray]: + """ + Extract images using pdf2image (requires poppler). + + Args: + pdf_content: PDF file content as bytes + dpi: Resolution for rendering + first_page: First page to extract (1-indexed) + last_page: Last page to extract (1-indexed) + + Returns: + List of images as numpy arrays (BGR format) + """ + from pdf2image import convert_from_bytes + + images = [] + + # Convert PDF to images + pil_images = convert_from_bytes( + pdf_content, + dpi=dpi, + first_page=first_page, + last_page=last_page, + fmt="RGB", + ) + + for idx, pil_image in enumerate(pil_images): + # Convert to numpy array (RGB) + img_array = np.array(pil_image) + + # Convert RGB to BGR for OpenCV compatibility + if len(img_array.shape) == 3 and img_array.shape[2] == 3: + img_array = img_array[:, :, ::-1].copy() + + images.append(img_array) + + logger.debug( + "Extracted PDF page", + page_num=idx + 1, + size=f"{pil_image.width}x{pil_image.height}", + ) + + return images + + +def get_pdf_info(pdf_content: bytes) -> dict: + """ + Get information about a PDF file. + + Args: + pdf_content: PDF file content as bytes + + Returns: + Dictionary with PDF metadata + """ + info = { + "page_count": 0, + "title": None, + "author": None, + "subject": None, + "creator": None, + "encrypted": False, + } + + try: + import fitz + + pdf_document = fitz.open(stream=pdf_content, filetype="pdf") + + try: + info["page_count"] = len(pdf_document) + info["encrypted"] = pdf_document.is_encrypted + + # Get metadata + metadata = pdf_document.metadata + if metadata: + info["title"] = metadata.get("title") + info["author"] = metadata.get("author") + info["subject"] = metadata.get("subject") + info["creator"] = metadata.get("creator") + + finally: + pdf_document.close() + + except ImportError: + # Fallback: just check if it's a valid PDF + if is_pdf(pdf_content): + info["page_count"] = -1 # Unknown + else: + raise ValueError("Invalid PDF file") + + except Exception as e: + logger.error("Failed to get PDF info", error=str(e)) + raise ValueError(f"Failed to read PDF: {e}") + + return info + + +def validate_pdf(pdf_content: bytes, max_pages: int = 50) -> None: + """ + Validate a PDF file for OCR processing. + + Args: + pdf_content: PDF file content as bytes + max_pages: Maximum allowed pages + + Raises: + ValueError: If the PDF is invalid or exceeds limits + """ + if not is_pdf(pdf_content): + raise ValueError("File is not a valid PDF") + + try: + info = get_pdf_info(pdf_content) + + if info["encrypted"]: + raise ValueError("Encrypted PDFs are not supported") + + if info["page_count"] > max_pages: + raise ValueError( + f"PDF has {info['page_count']} pages, " + f"maximum allowed is {max_pages}" + ) + + except ValueError: + raise + except Exception as e: + raise ValueError(f"Failed to validate PDF: {e}") diff --git a/services/ocr-service/app/api/routes.py b/services/ocr-service/app/api/routes.py index 610bfcc..01ea69b 100644 --- a/services/ocr-service/app/api/routes.py +++ b/services/ocr-service/app/api/routes.py @@ -1 +1,77 @@ # FastAPI endpoints for OCR service +from fastapi import APIRouter, UploadFile, File, HTTPException +from fastapi.responses import JSONResponse +from PIL import Image +import io +import pytesseract + +from app.ocr.engine import OCREngine +from app.ocr.preprocessing import preprocess_image + +router = APIRouter(tags=["OCR"]) +ocr_engine = OCREngine() + + +@router.post("/ocr") +async def extract_text(file: UploadFile = File(...)): + """ + Extract text from an uploaded image using OCR. + + Supported formats: PNG, JPG, JPEG, TIFF, BMP + """ + # Validate file type + allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/tiff", "image/bmp"] + if file.content_type not in allowed_types: + raise HTTPException( + status_code=400, + detail=f"Invalid file type. Allowed types: {', '.join(allowed_types)}" + ) + + try: + # Read image + contents = await file.read() + image = Image.open(io.BytesIO(contents)) + + # Preprocess and extract text + processed_image = preprocess_image(image) + result = ocr_engine.extract_text(processed_image) + + return JSONResponse(content={ + "success": True, + "filename": file.filename, + "text": result["text"], + "confidence": result.get("confidence", None) + }) + + except Exception as e: + raise HTTPException(status_code=500, detail=f"OCR processing failed: {str(e)}") + + +@router.post("/ocr/batch") +async def extract_text_batch(files: list[UploadFile] = File(...)): + """ + Extract text from multiple images. + """ + results = [] + + for file in files: + try: + contents = await file.read() + image = Image.open(io.BytesIO(contents)) + processed_image = preprocess_image(image) + result = ocr_engine.extract_text(processed_image) + + results.append({ + "filename": file.filename, + "success": True, + "text": result["text"], + "confidence": result.get("confidence", None) + }) + except Exception as e: + results.append({ + "filename": file.filename, + "success": False, + "error": str(e) + }) + + return JSONResponse(content={"results": results}) diff --git a/services/ocr-service/app/config/__init__.py b/services/ocr-service/app/config/__init__.py new file mode 100644 index 0000000..a38cc87 --- /dev/null +++ b/services/ocr-service/app/config/__init__.py @@ -0,0 +1 @@ +# Config module diff --git a/services/ocr-service/app/config/settings.py b/services/ocr-service/app/config/settings.py index 74e0e55..6ee1e2a 100644 --- a/services/ocr-service/app/config/settings.py +++ b/services/ocr-service/app/config/settings.py @@ -1 +1,27 @@ # Settings and configuration for OCR service +import os +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + """Application settings loaded from environment variables.""" + + # App settings + app_name: str = "Kixi OCR Service" + app_version: str = "1.0.0" + debug: bool = os.getenv("DEBUG", "false").lower() == "true" + + # OCR settings + ocr_language: str = os.getenv("OCR_LANGUAGE", "por+eng") + max_file_size_mb: int = int(os.getenv("MAX_FILE_SIZE_MB", "10")) + + # Server settings + host: str = os.getenv("HOST", "0.0.0.0") + port: int = int(os.getenv("PORT", "8000")) + + class Config: + env_file = ".env" + case_sensitive = False + + +settings = Settings() diff --git a/services/ocr-service/app/main.py b/services/ocr-service/app/main.py index f2bcd9f..f1eb2e9 100644 --- a/services/ocr-service/app/main.py +++ b/services/ocr-service/app/main.py @@ -1 +1,35 @@ # Entry point for OCR service +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from app.api.routes import router + +app = FastAPI( + title="Kixi OCR Service", + description="Serviço de OCR para extração de texto de imagens", + version="1.0.0" +) + +# CORS configuration +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Include routes +app.include_router(router, prefix="/api/v1") + + +@app.get("/health") +async def health_check(): + """Health check endpoint""" + return {"status": "healthy", "service": "kixi-ocr"} + + +@app.get("/") +async def root(): + """Root endpoint""" + return {"message": "Kixi OCR Service", "version": "1.0.0"} diff --git a/services/ocr-service/app/ocr/__init__.py b/services/ocr-service/app/ocr/__init__.py new file mode 100644 index 0000000..531b055 --- /dev/null +++ b/services/ocr-service/app/ocr/__init__.py @@ -0,0 +1 @@ +# OCR module diff --git a/services/ocr-service/app/ocr/engine.py b/services/ocr-service/app/ocr/engine.py index 8f2d042..981ffcb 100644 --- a/services/ocr-service/app/ocr/engine.py +++ b/services/ocr-service/app/ocr/engine.py @@ -1 +1,94 @@ # PaddleOCR-VL engine implementation +import pytesseract +from PIL import Image +from typing import Optional + + +class OCREngine: + """ + OCR Engine using Tesseract for text extraction. + """ + + def __init__(self, lang: str = "por+eng"): + """ + Initialize OCR Engine. + + Args: + lang: Language(s) to use for OCR. Default is Portuguese + English. + """ + self.lang = lang + self.config = "--oem 3 --psm 6" # LSTM engine, uniform text block + + def extract_text(self, image: Image.Image) -> dict: + """ + Extract text from a PIL Image. + + Args: + image: PIL Image object + + Returns: + Dictionary with extracted text and confidence + """ + try: + # Get text with confidence data + data = pytesseract.image_to_data( + image, + lang=self.lang, + config=self.config, + output_type=pytesseract.Output.DICT + ) + + # Calculate average confidence + confidences = [int(c) for c in data['conf'] if int(c) > 0] + avg_confidence = sum(confidences) / len(confidences) if confidences else 0 + + # Get full text + text = pytesseract.image_to_string( + image, + lang=self.lang, + config=self.config + ) + + return { + "text": text.strip(), + "confidence": round(avg_confidence, 2), + "word_count": len([w for w in data['text'] if w.strip()]) + } + + except Exception as e: + raise RuntimeError(f"OCR extraction failed: {str(e)}") + + def extract_text_with_boxes(self, image: Image.Image) -> dict: + """ + Extract text with bounding box coordinates. + + Args: + image: PIL Image object + + Returns: + Dictionary with text blocks and their coordinates + """ + try: + data = pytesseract.image_to_data( + image, + lang=self.lang, + config=self.config, + output_type=pytesseract.Output.DICT + ) + + blocks = [] + for i, text in enumerate(data['text']): + if text.strip(): + blocks.append({ + "text": text, + "x": data['left'][i], + "y": data['top'][i], + "width": data['width'][i], + "height": data['height'][i], + "confidence": data['conf'][i] + }) + + return {"blocks": blocks} + + except Exception as e: + raise RuntimeError(f"OCR extraction with boxes failed: {str(e)}") diff --git a/services/ocr-service/app/ocr/postprocessing.py b/services/ocr-service/app/ocr/postprocessing.py index 12133d5..e75a867 100644 --- a/services/ocr-service/app/ocr/postprocessing.py +++ b/services/ocr-service/app/ocr/postprocessing.py @@ -1 +1,1602 @@ -# Basic normalization post-OCR +""" +OCR Postprocessing Module + +Provides utilities for processing and structuring raw OCR output: +- Text normalization and cleaning +- Question detection and segmentation +- Metadata extraction (school year, term, subject, etc.) +- Question type inference (dissertativa, multipla_escolha) +- Option extraction for multiple choice questions +- Image detection and region identification +- Confidence score aggregation + +Optimized for Angolan exam papers (12ª classe). +""" + +import re +from dataclasses import dataclass, field +from typing import Optional, List, Dict, Any, Tuple +from enum import Enum + + +class QuestionType(str, Enum): + """Supported question types.""" + DISSERTATIVA = "dissertativa" + MULTIPLA_ESCOLHA = "multipla_escolha" + UNKNOWN = "unknown" + + +@dataclass +class TextBlock: + """Represents a block of text with position and confidence.""" + text: str + confidence: float + bbox: Tuple[int, int, int, int] # x1, y1, x2, y2 + page_index: int = 0 + line_index: int = 0 + + +@dataclass +class ExtractedOption: + """Represents an extracted question option.""" + option_label: str + option_text: str + confidence: float + + +@dataclass +class ImageToUpload: + """Represents an image region to be uploaded.""" + suggested_filename: str + description: str + region: str # questao_1, cabecalho, rodape, etc. + bbox: Optional[Tuple[int, int, int, int]] = None + page_index: int = 0 + + +@dataclass +class SubitemContent: + """Represents a subitem (alínea) with its label and content.""" + label: str # "a)", "b)", etc. + text: str # The content of the subitem + cotacao: Optional[float] = None + + def to_dict(self) -> Dict[str, Any]: + return { + "label": self.label, + "text": self.text, + "cotacao": self.cotacao, + } + + +@dataclass +class ExtractedQuestion: + """Represents an extracted question with all its components.""" + number: str # Can be "1", "2a", "3-b)", etc. + text: str + text_confidence: float + question_type: QuestionType + question_type_confidence: float + subitems: List[str] = field(default_factory=list) # ["a)", "b)", "c)"] - labels only + subitems_content: List[SubitemContent] = field(default_factory=list) # Full subitem with content + cotacao: Optional[float] = None + cotacao_confidence: float = 0.0 + options: Optional[List[ExtractedOption]] = None + has_image: bool = False + image_description: Optional[str] = None + page_index: int = 0 + start_y: int = 0 + end_y: int = 0 + + @property + def confidence(self) -> float: + """Calculate overall question confidence.""" + confidences = [self.text_confidence, self.question_type_confidence] + if self.options: + confidences.extend(opt.confidence for opt in self.options) + if self.cotacao is not None: + confidences.append(self.cotacao_confidence) + return sum(confidences) / len(confidences) if confidences else 0.0 + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + result = { + "number": self.number, + "confidence": round(self.confidence, 3), + "subitems": self.subitems, + "subitemsContent": [s.to_dict() for s in self.subitems_content] if self.subitems_content else [], + "text": {"value": self.text, "confidence": round(self.text_confidence, 3)}, + "type": self.question_type.value, + "cotacao": self.cotacao, + "hasImage": self.has_image, + "pageIndex": self.page_index, + "startY": self.start_y, + "endY": self.end_y, + } + + if self.image_description: + result["imageDescription"] = self.image_description + + if self.options: + result["options"] = [ + { + "optionLabel": opt.option_label, + "optionText": opt.option_text, + "confidence": round(opt.confidence, 3), + } + for opt in self.options + ] + else: + result["options"] = None + + return result + + +@dataclass +class MetadataField: + """Represents an extracted metadata field with confidence.""" + value: Optional[Any] + confidence: float + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + return {"value": self.value, "confidence": round(self.confidence, 3)} + + +@dataclass +class ExtractedMetadata: + """Represents extracted document metadata - Angolan exam format.""" + exam_type: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + duration_minutes: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + variant: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + title: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + instructions: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + school_year_start: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + school_year_end: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + class_grade: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + course_name: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + subject_name: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + total_max_score: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + # Legacy fields for compatibility + school_year: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + term: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + subject: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + course: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + class_info: MetadataField = field(default_factory=lambda: MetadataField(None, 0.0)) + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + return { + # New structured fields + "examType": self.exam_type.to_dict(), + "durationMinutes": self.duration_minutes.to_dict(), + "variant": self.variant.to_dict(), + "title": self.title.to_dict(), + "instructions": self.instructions.to_dict(), + "schoolYearStart": self.school_year_start.to_dict(), + "schoolYearEnd": self.school_year_end.to_dict(), + "classGrade": self.class_grade.to_dict(), + "courseName": self.course_name.to_dict(), + "subjectName": self.subject_name.to_dict(), + "totalMaxScore": self.total_max_score.to_dict(), + # Legacy fields + "schoolYear": self.school_year.to_dict(), + "term": self.term.to_dict(), + "subject": self.subject.to_dict(), + "course": self.course.to_dict(), + "class": self.class_info.to_dict(), + } + + +@dataclass +class UnmappedContent: + """Represents content that couldn't be mapped to questions or metadata.""" + page_index: int + text: str + confidence: float + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + return { + "pageIndex": self.page_index, + "text": self.text, + "confidence": round(self.confidence, 3), + } + + +@dataclass +class Warning: + """Represents a processing warning.""" + code: str + field: str + confidence: float + message: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + result = { + "code": self.code, + "field": self.field, + "confidence": round(self.confidence, 3), + } + if self.message: + result["message"] = self.message + return result + + +class OCRPostprocessor: + """ + Postprocessor for OCR results - Optimized for Angolan exams. + + Transforms raw OCR text blocks into structured data including: + - Document metadata (school year, subject, exam type, etc.) + - Questions with their types and options + - Image regions for upload + - Confidence scores for all extracted values + """ + + # Regex patterns for metadata extraction + PATTERNS = { + # School year patterns (e.g., "2024/2025", "Ano Letivo 2024-2025", "Ano Lectivo: 2024/2025") + "school_year": [ + # Explicit "Ano Letivo" patterns + r"ano\s*let[ií]vo\s*[:\s]*(\d{4})\s*[/-]\s*(\d{2,4})", + r"ano\s*lect[ií]vo\s*[:\s]*(\d{4})\s*[/-]\s*(\d{2,4})", + r"ano\s*lectivo\s*[:\s]*(\d{4})\s*[/-]\s*(\d{2,4})", + # Inline patterns like "12ª Classe Ano Lectivo: 2024/2025" + r"classe\s*ano\s*lect[ií]vo\s*[:\s]*(\d{4})\s*[/-]\s*(\d{2,4})", + # Standalone year patterns (less specific, use last) + r"(\d{4})\s*[/-]\s*(\d{4})", + r"(\d{4})\s*/\s*(\d{2,4})", + # Pattern with colon: "2024/2025" + r"[:\s](\d{4})\s*/\s*(\d{2,4})", + ], + # Term patterns (e.g., "1º Trimestre") + "term": [ + r"(\d)[ºª°]?\s*(?:trimestre|term|período|bimestre)", + r"(?:trimestre|term|período|bimestre)\s*(\d)", + r"(I{1,3}|IV)\s*(?:trimestre|term|período)", + ], + # Subject patterns - Angolan format (improved for "PROVA DE EXAME DE MATEMÁTICA") + "subject": [ + # Most specific: "PROVA DE EXAME DE MATEMÁTICA" + r"prova\s+de\s+exame\s+de\s+([A-Za-záàâãéèêíïóôõöúçñÁÀÂÃÉÈÊÍÏÓÔÕÖÚÇÑ]+)", + # "PROVA DE RECURSO DE MATEMÁTICA" + r"prova\s+de\s+recurso\s+de\s+([A-Za-záàâãéèêíïóôõöúçñÁÀÂÃÉÈÊÍÏÓÔÕÖÚÇÑ]+)", + # "EXAME DE MATEMÁTICA" + r"exame\s+de\s+([A-Za-záàâãéèêíïóôõöúçñÁÀÂÃÉÈÊÍÏÓÔÕÖÚÇÑ]+)", + # "PROVA DE MATEMÁTICA" + r"prova\s+de\s+([A-Za-záàâãéèêíïóôõöúçñÁÀÂÃÉÈÊÍÏÓÔÕÖÚÇÑ]+)", + # Generic patterns + r"(?:recurso\s+de\s+)([A-Za-záàâãéèêíïóôõöúçñÁÀÂÃÉÈÊÍÏÓÔÕÖÚÇÑ]+)", + r"(?:disciplina|subject|matéria|cadeira)[:\s]+([A-Za-záàâãéèêíïóôõöúçñÁÀÂÃÉÈÊÍÏÓÔÕÖÚÇÑ\s\-]+)", + ], + # Duration patterns (e.g., "90 Min", "Duração: 90 minutos") + "duration": [ + r"(?:duração|duration|tempo)[:\s]*(\d+)\s*(?:min(?:utos?)?|minutes?)", + r"(\d+)\s*(?:min(?:utos?)?)\b", + ], + # Variant/Series patterns (e.g., "Série: B", "Variante A", "Serie: B") + "variant": [ + r"s[ée]rie\s*[:\s]*([A-Za-z])\b", + r"(?:versão|variant|variante)\s*[:\s]*([A-Za-z])\b", + r"(?:série|serie)\s*([A-Za-z])\b", + ], + # Class/Grade patterns (e.g., "12ª Classe", "10º Ano", "12a Classe") + # More flexible patterns to catch various OCR outputs + "class": [ + r"(\d{1,2})\s*[ºª°aᵃ]?\s*classe", + r"(\d{1,2})\s*[ºª°oᵒ]?\s*ano", + r"(\d{1,2})\s*classe", + r"classe\s*[:\s]*(\d{1,2})", + r"(\d{1,2})\s*[ºª]\s*cl", + # Pattern for inline text like "12a Classe Ano Lectivo" + r"(\d{1,2})[aª]\s*classe", + ], + # Course patterns (e.g., "CURSO: TODOS", "Curso: Ciências") + # More restrictive to avoid capturing garbage - must have CURSO: prefix + "course": [ + r"curso\s*:\s*([A-Z]+)(?:\s|$|[^A-Za-z])", + r"curso\s*:\s+([A-Za-z]+)(?:\s|$)", + ], + # Footer/garbage patterns to filter out + "footer_garbage": [ + r"COORDENA[CÇ][AÃ]O", + r"minttics\.gov\.ao", + r"LUANDA[\s-]*ANGOLA", + r"ANGOLA", + r"gov\.ao", + r"\d+/E\d+/\d+", + r"kaixa", + r"klvs", + r"GOIK", + ], + # Inline cotação patterns (e.g., "(3V)", "(2,5V)", "(4 valores)") + "inline_cotacao": [ + r"\((\d+(?:[.,]\d+)?)\s*[Vv]\)", + r"\((\d+(?:[.,]\d+)?)\s*(?:valores?|pontos?|pts?)\)", + r"\[(\d+(?:[.,]\d+)?)\s*[Vv]\]", + ], + # Exam type patterns (improved for recurso, época, etc.) + "exam_type": [ + # Most specific patterns first + r"(prova\s+de\s+exame\s+de\s+\w+)", + r"(prova\s+de\s+recurso\s+de\s+\w+)", + r"(prova\s+de\s+recurso)", + r"(prova\s+de\s+exame)", + r"(exame\s+de\s+recurso)", + r"(exame\s+de\s+época)", + r"(avaliação\s*(?:periódica|sumativa|formativa|diagnóstica|final|contínua))", + r"(prova\s*(?:escrita|oral|prática|final|parcial|de\s+recurso))", + r"(exame\s*(?:final|nacional|regional|provincial|de\s+época)?)", + r"(teste\s*(?:escrito|sumativo|formativo)?)", + r"(recurso)", + ], + # Question number patterns - More flexible for Angolan format + "question_number": [ + r"^(\d+)\s*[-.):]", + r"^(?:questão|pergunta|question|exercício|problema|item)\s*n?[ºª°]?\s*(\d+)", + ], + # Subitem patterns (e.g., "a)", "b.", "(a)") + "subitem": [ + r"^\(?([a-z])\)?[.):]\s*", + ], + # Option patterns for multiple choice + "option": [ + r"^\(?([A-Da-d])\)?[.):]\s*(.+)", + r"^([A-Da-d])\s*[-–—]\s*(.+)", + ], + # Score/Cotação patterns - Angolan format + "cotacao": [ + r"(\d+(?:[.,]\d+)?)\s*(?:valores?|pontos?|pts?|marks?|points?)", + r"[(\[]\s*(\d+(?:[.,]\d+)?)\s*(?:valores?|pontos?|pts?|marks?|points?)?\s*[)\]]", + ], + # Cotação block pattern (e.g., "Cotação 1-a) 3 valores 2-) 4 valores") + # Updated to be more flexible and capture multi-line cotação blocks + "cotacao_block": [ + r"(?:cotação|cotacao|pontuação|pontuacao)[:\s]*(.+?)(?:\.|$|(?=\n\n))", + r"(?:cotação|cotacao|pontuação|pontuacao)[:\s]*(.+)", + ], + # Individual cotação item patterns for parsing (more comprehensive) + "cotacao_item": [ + # Pattern: "1-a) 3 valores" or "1-a) 3,5 valores" or "1a) 3 valores" + r"(\d+)\s*[-\s]?\s*([a-z])\s*\)?\s*(\d+(?:[.,]\d+)?)\s*(?:valores?|pontos?|pts?)", + # Pattern: "1-) 3 valores" or "1) 3 valores" (no subitem) + r"(\d+)\s*[-\s]?\s*\)?\s*(\d+(?:[.,]\d+)?)\s*(?:valores?|pontos?|pts?)", + # Pattern: "2-) 4 valores" with explicit dash + r"(\d+)\s*-\s*\)\s*(\d+(?:[.,]\d+)?)\s*(?:valores?|pontos?|pts?)", + # Pattern: "5-a) 2,5 valores 5-b) 2,5 valores" - captures with subitem + r"(\d+)\s*-\s*([a-z])\s*\)\s*(\d+(?:[.,]\d+)?)\s*(?:valores?|pontos?|pts?)", + ], + # Image indicators + "image_indicator": [ + r"(?:figura|gráfico|tabela|diagrama|imagem|graph|table|figure|image)", + r"(?:veja|observe|analise|considere)\s+(?:a|o)\s+(?:figura|gráfico|tabela)", + r"(?:na\s+)?(?:figura|gráfico|tabela)\s+(?:abaixo|seguinte|acima)", + ], + # Coordination signature patterns + "coordination": [ + r"(?:a\s+)?coordena[çc][ãa]o", + r"coordenador(?:a)?", + r"assinatura", + r"(?:fim\s+da?\s+prova)", + ], + } + + # Keywords for question type inference + QUESTION_TYPE_KEYWORDS = { + QuestionType.MULTIPLA_ESCOLHA: [ + "escolha", "assinale", "marque", "alternativa", "opção", + "select", "choose", "mark", "circle", "option", + ], + QuestionType.DISSERTATIVA: [ + "resolva", "resolve", "calcule", "calcular", "determine", "determina", + "justifique", "explique", "desenvolva", "comente", "discuta", + "demonstre", "prove", "mostre", "encontre", "simplifique", + "analise", "compare", "descreva", "fundamente", + ], + } + + # Subject name corrections for OCR errors + SUBJECT_CORRECTIONS = { + "matematica": "Matemática", + "matemática": "Matemática", + "fisica": "Física", + "física": "Física", + "fisíca": "Física", + "fìsica": "Física", + "quimica": "Química", + "química": "Química", + "quìmica": "Química", + "biologia": "Biologia", + "portugues": "Português", + "português": "Português", + "ingles": "Inglês", + "inglês": "Inglês", + "frances": "Francês", + "francês": "Francês", + "historia": "História", + "história": "História", + "geografia": "Geografia", + "filosofia": "Filosofia", + "educacao": "Educação", + "educação": "Educação", + "desenho": "Desenho", + "geometria": "Geometria", + "informatica": "Informática", + "informática": "Informática", + "economia": "Economia", + "sociologia": "Sociologia", + "psicologia": "Psicologia", + } + + def __init__( + self, + min_confidence_threshold: float = 0.5, + low_confidence_threshold: float = 0.8, + ): + """ + Initialize the postprocessor. + + Args: + min_confidence_threshold: Minimum confidence to include results + low_confidence_threshold: Threshold below which to add warnings + """ + self.min_confidence_threshold = min_confidence_threshold + self.low_confidence_threshold = low_confidence_threshold + + def process( + self, + text_blocks: List[TextBlock], + page_count: int = 1, + ) -> Tuple[ExtractedMetadata, List[ExtractedQuestion], List[ImageToUpload], List[UnmappedContent], List[Warning]]: + """ + Process OCR text blocks into structured data. + + Args: + text_blocks: List of text blocks from OCR + page_count: Number of pages in the document + + Returns: + Tuple of (metadata, questions, images_to_upload, unmapped_content, warnings) + """ + warnings = [] + unmapped = [] + images_to_upload = [] + + # Sort blocks by page and position + sorted_blocks = sorted(text_blocks, key=lambda b: (b.page_index, b.bbox[1], b.bbox[0])) + + # Combine all text for pattern matching + full_text = " ".join(b.text for b in sorted_blocks) + + # Extract metadata from header blocks + metadata = self._extract_metadata(sorted_blocks, full_text) + + # Clean up null/empty fields to avoid persisting empty data + metadata = self._clean_null_fields(metadata) + + warnings.extend(self._generate_metadata_warnings(metadata)) + + # Parse cotação block if present + cotacao_map = self._parse_cotacao_block(full_text) + + # Segment and extract questions + questions = self._extract_questions(sorted_blocks, cotacao_map) + warnings.extend(self._generate_question_warnings(questions)) + + # Calculate total max score from questions or cotação map + total_score = sum(q.cotacao for q in questions if q.cotacao is not None) + if total_score > 0: + metadata.total_max_score = MetadataField(round(total_score, 1), 0.9) + elif cotacao_map: + # If questions don't have cotação yet, sum from cotação map + total_from_map = sum(cotacao_map.values()) + if total_from_map > 0: + metadata.total_max_score = MetadataField(round(total_from_map, 1), 0.85) + + # Detect images to upload + images_to_upload = self._detect_images_to_upload(metadata, questions, sorted_blocks, full_text) + + # Collect unmapped content + unmapped = self._collect_unmapped(sorted_blocks, metadata, questions) + + return metadata, questions, images_to_upload, unmapped, warnings + + def _extract_metadata(self, blocks: List[TextBlock], full_text: str) -> ExtractedMetadata: + """Extract document metadata from text blocks.""" + metadata = ExtractedMetadata() + full_text_lower = full_text.lower() + + # Extract school year - try multiple approaches + school_year_found = False + for pattern in self.PATTERNS["school_year"]: + match = re.search(pattern, full_text, re.IGNORECASE) + if match: + start_year = int(match.group(1)) + end_year_str = match.group(2) + if len(end_year_str) == 2: + end_year = int(str(start_year)[:2] + end_year_str) + else: + end_year = int(end_year_str) + + # Validate years are reasonable (2000-2100) + if 2000 <= start_year <= 2100 and 2000 <= end_year <= 2100: + confidence = self._estimate_confidence_from_match(match, full_text, blocks) + metadata.school_year_start = MetadataField(start_year, confidence) + metadata.school_year_end = MetadataField(end_year, confidence) + metadata.school_year = MetadataField(f"{start_year}/{end_year}", confidence) + school_year_found = True + break + + # Fallback: look in header blocks specifically + if not school_year_found: + header_text = " ".join(b.text for b in blocks[:20]) + year_patterns = [ + r"(\d{4})\s*[/-]\s*(\d{4})", + r"(\d{4})\s*/\s*(\d{2,4})", + ] + for pattern in year_patterns: + match = re.search(pattern, header_text) + if match: + start_year = int(match.group(1)) + end_year_str = match.group(2) + if len(end_year_str) == 2: + end_year = int(str(start_year)[:2] + end_year_str) + else: + end_year = int(end_year_str) + + if 2000 <= start_year <= 2100 and 2000 <= end_year <= 2100: + metadata.school_year_start = MetadataField(start_year, 0.8) + metadata.school_year_end = MetadataField(end_year, 0.8) + metadata.school_year = MetadataField(f"{start_year}/{end_year}", 0.8) + break + + # Extract term + for pattern in self.PATTERNS["term"]: + match = re.search(pattern, full_text, re.IGNORECASE) + if match: + term_value = match.group(1) + term_map = { + "i": "1", "ii": "2", "iii": "3", "iv": "4", + } + normalized = term_map.get(term_value.lower(), term_value) + confidence = self._estimate_confidence_from_match(match, full_text, blocks) + metadata.term = MetadataField(f"{normalized}º Trimestre", confidence) + break + + # Extract subject (try multiple approaches) + subject_found = False + for pattern in self.PATTERNS["subject"]: + match = re.search(pattern, full_text, re.IGNORECASE) + if match: + subject = match.group(1).strip() + # Skip if it's just "Recurso" - we want the actual subject + if subject.lower() == "recurso": + continue + # Clean and normalize subject name + subject = self._normalize_subject_name(subject) + confidence = self._estimate_confidence_from_match(match, full_text, blocks) + metadata.subject_name = MetadataField(subject, confidence) + metadata.subject = MetadataField(subject, confidence) + subject_found = True + break + + # If not found, try to find subject keywords in text + if not subject_found: + for subject_key, subject_value in self.SUBJECT_CORRECTIONS.items(): + if subject_key in full_text_lower: + metadata.subject_name = MetadataField(subject_value, 0.75) + metadata.subject = MetadataField(subject_value, 0.75) + break + + # Extract duration + for pattern in self.PATTERNS["duration"]: + match = re.search(pattern, full_text, re.IGNORECASE) + if match: + minutes = int(match.group(1)) + confidence = self._estimate_confidence_from_match(match, full_text, blocks) + metadata.duration_minutes = MetadataField(minutes, confidence) + break + + # Extract variant/series + for pattern in self.PATTERNS["variant"]: + match = re.search(pattern, full_text, re.IGNORECASE) + if match: + variant = match.group(1).upper() + confidence = self._estimate_confidence_from_match(match, full_text, blocks) + metadata.variant = MetadataField(variant, confidence) + break + + # Extract class/grade - try multiple approaches + grade_found = False + + # First, try patterns on full text + for pattern in self.PATTERNS["class"]: + match = re.search(pattern, full_text, re.IGNORECASE) + if match: + grade = match.group(1) + # Validate grade is reasonable (1-13 for Angola) + try: + grade_int = int(grade) + if 1 <= grade_int <= 13: + confidence = self._estimate_confidence_from_match(match, full_text, blocks) + metadata.class_grade = MetadataField(str(grade_int), confidence) + metadata.class_info = MetadataField(f"{grade_int}ª Classe", confidence) + grade_found = True + break + except ValueError: + continue + + # Fallback 1: look for common class patterns in first blocks (header area) + if not grade_found: + header_text = " ".join(b.text for b in blocks[:20]) + # Multiple patterns for class extraction + class_patterns = [ + r"(\d{1,2})\s*[ºª°aᵃ]?\s*classe", + r"(\d{1,2})\s*[ºª°aᵃ]\s*cl\b", + r"(\d{1,2})a\s+classe", + r"(\d{1,2})ª\s+classe", + # Pattern like "12a Classe Ano Lectivo" + r"(\d{1,2})[aª]\s*classe\s*ano", + ] + for pattern in class_patterns: + class_match = re.search(pattern, header_text, re.IGNORECASE) + if class_match: + try: + grade_int = int(class_match.group(1)) + if 1 <= grade_int <= 13: + metadata.class_grade = MetadataField(str(grade_int), 0.8) + metadata.class_info = MetadataField(f"{grade_int}ª Classe", 0.8) + grade_found = True + break + except ValueError: + continue + + # Fallback 2: look for class in instructions text (common in Angolan exams) + if not grade_found and metadata.instructions.value: + instr_text = metadata.instructions.value + for pattern in class_patterns: + class_match = re.search(pattern, instr_text, re.IGNORECASE) + if class_match: + try: + grade_int = int(class_match.group(1)) + if 1 <= grade_int <= 13: + metadata.class_grade = MetadataField(str(grade_int), 0.75) + metadata.class_info = MetadataField(f"{grade_int}ª Classe", 0.75) + grade_found = True + break + except ValueError: + continue + + # Fallback 3: search for standalone pattern like "12 Classe" anywhere + if not grade_found: + standalone_pattern = r"\b(\d{1,2})\s*classe\b" + standalone_match = re.search(standalone_pattern, full_text, re.IGNORECASE) + if standalone_match: + try: + grade_int = int(standalone_match.group(1)) + if 1 <= grade_int <= 13: + metadata.class_grade = MetadataField(str(grade_int), 0.7) + metadata.class_info = MetadataField(f"{grade_int}ª Classe", 0.7) + except ValueError: + pass + + # Extract course - be more careful to get clean value + # First try explicit "CURSO:" pattern + course_match = re.search(r"curso\s*:\s*([A-Za-záàâãéèêíïóôõöúçñÁÀÂÃÉÈÊÍÏÓÔÕÖÚÇÑ\s]+?)(?:\s{2,}|$|\n|SEGIEM|N[°º])", full_text, re.IGNORECASE) + if course_match: + course = course_match.group(1).strip().upper() + # Clean course name - remove trailing garbage + course = re.sub(r'\s+', ' ', course) + # Only take first meaningful word(s) + course_words = course.split() + if len(course_words) > 0: + # Common course names + valid_courses = ['TODOS', 'CIENCIAS', 'HUMANIDADES', 'LETRAS', 'ARTES', + 'ECONOMIA', 'INFORMATICA', 'PEDAGOGIA', 'AGRONOMIA', 'GERAL'] + # Check if first word is a valid course + if course_words[0] in valid_courses: + course = course_words[0] + # Check if "TODOS" appears anywhere + elif 'TODOS' in course_words: + course = 'TODOS' + elif len(course_words) <= 2: + course = ' '.join(course_words) + else: + # Take only first meaningful word, skip garbage + for word in course_words: + if word in valid_courses or len(word) > 3: + course = word + break + confidence = 0.85 + metadata.course_name = MetadataField(course, confidence) + metadata.course = MetadataField(course, confidence) + + # Extract exam type + for pattern in self.PATTERNS["exam_type"]: + match = re.search(pattern, full_text, re.IGNORECASE) + if match: + exam_type = match.group(1).strip().title() + # Normalize exam types - more specific normalization + exam_type_lower = exam_type.lower() + if "prova de exame de" in exam_type_lower: + # Extract subject from "prova de exame de matemática" + exam_type = "Prova de Exame" + elif "prova de recurso" in exam_type_lower: + exam_type = "Prova de Recurso" + elif "exame de recurso" in exam_type_lower: + exam_type = "Exame de Recurso" + elif "exame de época" in exam_type_lower or "época" in exam_type_lower: + exam_type = "Exame de Época" + elif "prova de exame" in exam_type_lower or "exame" in exam_type_lower: + exam_type = "Prova de Exame" + elif "avaliação" in exam_type_lower: + exam_type = exam_type # Keep as is (Avaliação Periódica, etc.) + elif "teste" in exam_type_lower: + exam_type = exam_type # Keep as is + confidence = self._estimate_confidence_from_match(match, full_text, blocks) + metadata.exam_type = MetadataField(exam_type, confidence) + break + + # Build title from extracted components + title = self._build_title(metadata) + if title: + metadata.title = MetadataField(title, 0.85) + + # Extract instructions (look for instruction keywords) + instruction_keywords = ["leia", "responda", "atenção", "instruções", "coloque", "forma clara"] + for block in blocks[:25]: + if any(kw in block.text.lower() for kw in instruction_keywords): + # Check if this looks like an instruction block + if len(block.text) > 30 and not self._detect_question_number(block.text): + metadata.instructions = MetadataField(block.text.strip(), block.confidence) + break + + return metadata + + def _normalize_subject_name(self, subject: str) -> str: + """Normalize and correct subject name.""" + if not subject: + return subject + + subject_lower = subject.lower().strip() + + # Remove common OCR noise + subject_lower = re.sub(r'\s+', ' ', subject_lower) + subject_lower = subject_lower.replace('í', 'i').replace('ì', 'i') + + # Check for exact matches in corrections + if subject_lower in self.SUBJECT_CORRECTIONS: + return self.SUBJECT_CORRECTIONS[subject_lower] + + # Check for partial matches (more strict - at word boundaries) + for key, value in self.SUBJECT_CORRECTIONS.items(): + if re.search(rf'\b{re.escape(key)}\b', subject_lower): + return value + + # Return with proper capitalization + return subject.strip().title() + + def _build_title(self, metadata: ExtractedMetadata) -> Optional[str]: + """Build a title from extracted metadata components.""" + parts = [] + + if metadata.exam_type.value: + parts.append(metadata.exam_type.value) + + if metadata.subject_name.value: + if not parts or "de" not in parts[-1].lower(): + parts.append(f"de {metadata.subject_name.value}") + else: + parts.append(metadata.subject_name.value) + + if metadata.class_grade.value: + parts.append(f"{metadata.class_grade.value}ª Classe") + + if metadata.variant.value: + parts.append(f"- Série {metadata.variant.value}") + + if metadata.school_year_start.value and metadata.school_year_end.value: + parts.append(f"- {metadata.school_year_start.value}/{metadata.school_year_end.value}") + + return " ".join(parts) if parts else None + + def _parse_cotacao_block(self, full_text: str) -> Dict[str, float]: + """ + Parse cotação/scoring block to map question numbers to scores. + + Handles Angolan exam format like: + "Cotação 1-a) 3 valores 2-) 4 valores 3-a) 2,5 valores 3-b) 2,5 valores 4-) 3 valores 5-a) 2,5 valores 5-b) 2,5 valores" + + Also handles inline format like: + "COTACAO:1) a - 4 V,b - 4 V/2) 5 V/ 3) 4 V/4) 3 V" + """ + cotacao_map = {} + + # First, try to find cotação block + cotacao_text = None + for pattern in self.PATTERNS["cotacao_block"]: + match = re.search(pattern, full_text, re.IGNORECASE | re.DOTALL) + if match: + cotacao_text = match.group(1) + break + + if not cotacao_text: + # Try to find cotação anywhere in text + cotacao_match = re.search(r"cotaç[aã]o\s+(.+?)(?:\.|A\s+COORDENA|$)", full_text, re.IGNORECASE | re.DOTALL) + if cotacao_match: + cotacao_text = cotacao_match.group(1) + + # Also try inline format: "COTACAO:1) a - 4 V,b - 4 V/2) 5 V..." + if not cotacao_text: + inline_match = re.search(r"COTACAO\s*:\s*(.+?)(?:\s*$|\n\n)", full_text, re.IGNORECASE) + if inline_match: + cotacao_text = inline_match.group(1) + + if not cotacao_text: + return cotacao_map + + # Clean up the cotação text + cotacao_text = cotacao_text.replace('\n', ' ').replace('\r', ' ') + cotacao_text = re.sub(r'\s+', ' ', cotacao_text) + + # Parse using multiple patterns for flexibility + # Pattern 1: "1-a) 3 valores" - question with subitem + pattern1 = r"(\d+)\s*-?\s*([a-z])\s*\)?\s*(\d+(?:[.,]\d+)?)\s*(?:valores?|pontos?|pts?)" + for match in re.finditer(pattern1, cotacao_text, re.IGNORECASE): + q_num = match.group(1) + subitem = match.group(2).lower() + score = float(match.group(3).replace(",", ".")) + key = f"{q_num}{subitem}" + cotacao_map[key] = score + + # Pattern 2: "2-) 4 valores" - question without subitem (just dash and parenthesis) + pattern2 = r"(\d+)\s*-\s*\)\s*(\d+(?:[.,]\d+)?)\s*(?:valores?|pontos?|pts?)" + for match in re.finditer(pattern2, cotacao_text, re.IGNORECASE): + q_num = match.group(1) + score = float(match.group(2).replace(",", ".")) + # Only add if not already mapped with subitem + if q_num not in cotacao_map: + cotacao_map[q_num] = score + + # Pattern 3: "4-) 3 valores" or "4) 3 valores" - standalone questions + pattern3 = r"(? List[ExtractedQuestion]: + """Extract and structure questions from text blocks.""" + questions = [] + current_question = None + question_text_parts = [] + current_subitems = [] + current_subitems_content = [] + current_subitem_label = None + current_subitem_text_parts = [] + + for block in blocks: + text = block.text.strip() + + # Skip empty blocks + if not text: + continue + + # Skip cotação blocks + if re.match(r"^\s*cotaç[aã]o\s", text, re.IGNORECASE): + continue + + # Skip coordination/signature blocks + if self._is_coordination_block(text): + continue + + # Check if this block starts a new question + question_info = self._detect_question_number(text) + + if question_info is not None: + # Save current subitem if exists + if current_subitem_label and current_subitem_text_parts: + subitem_text = self._clean_text(" ".join(current_subitem_text_parts)) + subitem_cotacao = self._extract_inline_cotacao(subitem_text) + if subitem_cotacao: + subitem_text = self._remove_inline_cotacao(subitem_text) + current_subitems_content.append(SubitemContent( + label=current_subitem_label, + text=self._clean_footer_garbage(subitem_text), + cotacao=subitem_cotacao, + )) + + # Save previous question if exists + if current_question is not None: + current_question.text = self._clean_text(" ".join(question_text_parts)) + current_question.subitems = current_subitems + current_question.subitems_content = current_subitems_content + self._finalize_question(current_question, cotacao_map) + questions.append(current_question) + + q_num, remainder = question_info + + # Start new question + current_question = ExtractedQuestion( + number=str(q_num), + text="", + text_confidence=block.confidence, + question_type=QuestionType.UNKNOWN, + question_type_confidence=0.0, + page_index=block.page_index, + start_y=block.bbox[1], + end_y=block.bbox[3], + ) + question_text_parts = [remainder] if remainder else [] + current_subitems = [] + current_subitems_content = [] + current_subitem_label = None + current_subitem_text_parts = [] + + # Check for image indicators + has_image, image_desc = self._detect_image_reference(text) + if has_image: + current_question.has_image = True + current_question.image_description = image_desc + + elif current_question is not None: + # Check if this is a subitem start + subitem_info = self._detect_subitem_with_text(text) + if subitem_info: + subitem_label, subitem_remainder = subitem_info + + # Save previous subitem if exists + if current_subitem_label and current_subitem_text_parts: + subitem_text = self._clean_text(" ".join(current_subitem_text_parts)) + subitem_cotacao = self._extract_inline_cotacao(subitem_text) + if subitem_cotacao: + subitem_text = self._remove_inline_cotacao(subitem_text) + current_subitems_content.append(SubitemContent( + label=current_subitem_label, + text=self._clean_footer_garbage(subitem_text), + cotacao=subitem_cotacao, + )) + + # Start new subitem + if subitem_label not in current_subitems: + current_subitems.append(subitem_label) + current_subitem_label = subitem_label + current_subitem_text_parts = [subitem_remainder] if subitem_remainder else [] + + # Still add full text to question + question_text_parts.append(text) + + # Check if this is an option (for multiple choice) + elif self._is_option_block(text): + option = self._detect_option(text) + if option: + if current_question.options is None: + current_question.options = [] + current_question.options.append(ExtractedOption( + option_label=option[0], + option_text=option[1], + confidence=block.confidence, + )) + else: + # Add to question text + question_text_parts.append(text) + # Also add to current subitem if we're in one + if current_subitem_label: + current_subitem_text_parts.append(text) + + # Update end position + current_question.end_y = max(current_question.end_y, block.bbox[3]) + + # Check for image references in this block + has_image, image_desc = self._detect_image_reference(text) + if has_image and not current_question.has_image: + current_question.has_image = True + current_question.image_description = image_desc + + # Save last subitem + if current_subitem_label and current_subitem_text_parts: + subitem_text = self._clean_text(" ".join(current_subitem_text_parts)) + subitem_cotacao = self._extract_inline_cotacao(subitem_text) + if subitem_cotacao: + subitem_text = self._remove_inline_cotacao(subitem_text) + current_subitems_content.append(SubitemContent( + label=current_subitem_label, + text=self._clean_footer_garbage(subitem_text), + cotacao=subitem_cotacao, + )) + + # Save last question + if current_question is not None: + current_question.text = self._clean_text(" ".join(question_text_parts)) + current_question.subitems = current_subitems + current_question.subitems_content = current_subitems_content + self._finalize_question(current_question, cotacao_map) + questions.append(current_question) + + return questions + + def _finalize_question(self, question: ExtractedQuestion, cotacao_map: Dict[str, float]): + """Finalize question with type inference and cotação lookup.""" + # Infer question type + question.question_type, question.question_type_confidence = self._infer_question_type(question) + + # Look up cotação - try multiple strategies + cotacao_found = False + + # Strategy 1: Direct question number lookup from cotação block + if question.number in cotacao_map: + question.cotacao = cotacao_map[question.number] + question.cotacao_confidence = 0.9 + cotacao_found = True + + # Strategy 2: Sum subitems cotação from cotação block + if not cotacao_found: + total_cotacao = 0.0 + found_subitems = False + + # Try with detected subitems + for subitem in question.subitems: + # Clean subitem: "a)" -> "a" + clean_subitem = subitem.replace(')', '').replace('(', '').lower().strip() + key = f"{question.number}{clean_subitem}" + if key in cotacao_map: + total_cotacao += cotacao_map[key] + found_subitems = True + + # If no subitems detected, try common letters + if not found_subitems: + for letter in ['a', 'b', 'c', 'd', 'e']: + key = f"{question.number}{letter}" + if key in cotacao_map: + total_cotacao += cotacao_map[key] + found_subitems = True + + if found_subitems: + question.cotacao = total_cotacao + question.cotacao_confidence = 0.85 + cotacao_found = True + + # Strategy 3: Sum cotação from subitems_content (extracted inline) + if not cotacao_found and question.subitems_content: + total_cotacao = 0.0 + found_any = False + for subitem in question.subitems_content: + if subitem.cotacao is not None: + total_cotacao += subitem.cotacao + found_any = True + if found_any: + question.cotacao = total_cotacao + question.cotacao_confidence = 0.85 + cotacao_found = True + + # Strategy 4: Look for inline cotação in question text (e.g., "(3V)", "(4 valores)") + if not cotacao_found and question.text: + inline_cotacao = self._extract_inline_cotacao(question.text) + if inline_cotacao: + question.cotacao = inline_cotacao + question.cotacao_confidence = 0.8 + # Remove the cotação from question text + question.text = self._remove_inline_cotacao(question.text) + cotacao_found = True + + # Clean question text of any remaining inline cotação + if question.text: + question.text = self._remove_inline_cotacao(question.text) + + def _detect_question_number(self, text: str) -> Optional[Tuple[int, str]]: + """Detect if text starts with a question number. Returns (number, remainder) or None.""" + text = text.strip() + + # Skip blocks that look like class/grade indicators (e.g., "12ª Classe", "12a Classe") + class_pattern = r"^\d{1,2}\s*[ºª°a]?\s*(?:classe|class|ano|grade)" + if re.match(class_pattern, text, re.IGNORECASE): + return None + + # Skip blocks that look like header metadata (contain "Ano Lectivo", "Duração", etc.) + header_keywords = [ + r"ano\s*let[ií]vo", r"ano\s*lect[ií]vo", r"duração", r"duracao", + r"curso", r"proc\.", r"nome:", r"n[°º]?\s*proc" + ] + text_lower = text.lower() + for kw in header_keywords: + if re.search(kw, text_lower): + return None + + for pattern in self.PATTERNS["question_number"]: + match = re.match(pattern, text, re.IGNORECASE) + if match: + num_str = match.group(1) + try: + num = int(num_str) + remainder = text[match.end():].strip() + + # Additional validation: question numbers should be reasonable (1-50) + if num < 1 or num > 50: + continue + + # Skip if the remainder looks like metadata (class, year, etc.) + if remainder: + remainder_lower = remainder.lower() + if any(kw in remainder_lower for kw in ['classe', 'class', 'ano lectivo', 'ano letivo', 'duração', 'duracao']): + continue + + return (num, remainder) + except ValueError: + continue + + return None + + def _detect_subitem(self, text: str) -> Optional[str]: + """Detect if text starts with a subitem (a), b), etc.)""" + text = text.strip() + + for pattern in self.PATTERNS["subitem"]: + match = re.match(pattern, text, re.IGNORECASE) + if match: + return f"{match.group(1)})" + + return None + + def _detect_subitem_with_text(self, text: str) -> Optional[Tuple[str, str]]: + """Detect subitem and return (label, remainder text).""" + text = text.strip() + + # Patterns for subitems with their content + patterns = [ + r"^\(?([a-z])\)?[.):]\s*(.*)$", # a) text, a. text, (a) text + r"^([a-z])\s*[-–—]\s*(.*)$", # a - text, a – text + ] + + for pattern in patterns: + match = re.match(pattern, text, re.IGNORECASE) + if match: + label = f"{match.group(1).lower()})" + remainder = match.group(2).strip() + return (label, remainder) + + return None + + def _extract_inline_cotacao(self, text: str) -> Optional[float]: + """Extract inline cotação like (3V), (2,5V), (4 valores) from text.""" + if not text: + return None + + for pattern in self.PATTERNS.get("inline_cotacao", []): + match = re.search(pattern, text, re.IGNORECASE) + if match: + value_str = match.group(1).replace(",", ".") + try: + return float(value_str) + except ValueError: + continue + + # Fallback patterns + fallback_patterns = [ + r"\((\d+(?:[.,]\d+)?)\s*[Vv]\)", + r"\((\d+(?:[.,]\d+)?)\s*valores?\)", + r"\((\d+(?:[.,]\d+)?)\s*pontos?\)", + r";(\d+(?:[.,]\d+)?)\s*[Vv]\)", # Pattern like ";2V)" + ] + for pattern in fallback_patterns: + match = re.search(pattern, text, re.IGNORECASE) + if match: + value_str = match.group(1).replace(",", ".") + try: + return float(value_str) + except ValueError: + continue + + return None + + def _remove_inline_cotacao(self, text: str) -> str: + """Remove inline cotação from text.""" + if not text: + return text + + patterns = [ + r"\s*\(\d+(?:[.,]\d+)?\s*[Vv]\)", + r"\s*\(\d+(?:[.,]\d+)?\s*valores?\)", + r"\s*\(\d+(?:[.,]\d+)?\s*pontos?\)", + r"\s*\[\d+(?:[.,]\d+)?\s*[Vv]\]", + r";\s*\d+(?:[.,]\d+)?\s*[Vv]\)", # Pattern like ";2V)" + ] + for pattern in patterns: + text = re.sub(pattern, "", text, flags=re.IGNORECASE) + + return text.strip() + + def _clean_footer_garbage(self, text: str) -> str: + """Remove footer garbage like coordination signatures, website URLs, etc.""" + if not text: + return text + + # Remove common footer patterns + garbage_patterns = [ + r"A?\s*COORDENA[CÇ][AÃ]O.*$", + r"minttics\.gov\.ao.*$", + r"LUANDA[\s-]*ANGOLA.*$", + r"gov\.ao.*$", + r"\d+/E\d+/\d+.*$", + r"kaixa\d*.*$", + r"klvs.*$", + r"GOIK.*$", + r"b\s*=\s*N\.m\.I.*$", + r"A7IUANDA.*$", + r"\.1\.10\s+kaixa.*$", + ] + for pattern in garbage_patterns: + text = re.sub(pattern, "", text, flags=re.IGNORECASE) + + # Clean up extra whitespace + text = re.sub(r'\s+', ' ', text).strip() + + return text + + def _is_option_block(self, text: str) -> bool: + """Check if text looks like a multiple choice option.""" + text = text.strip() + for pattern in self.PATTERNS["option"]: + if re.match(pattern, text, re.IGNORECASE): + return True + return False + + def _detect_option(self, text: str) -> Optional[Tuple[str, str]]: + """Detect if text is a question option.""" + text = text.strip() + + for pattern in self.PATTERNS["option"]: + match = re.match(pattern, text, re.IGNORECASE) + if match: + label = match.group(1).upper() + option_text = match.group(2).strip() + return (label, option_text) + + return None + + def _detect_image_reference(self, text: str) -> Tuple[bool, Optional[str]]: + """Detect if text contains image/figure references.""" + text_lower = text.lower() + + for pattern in self.PATTERNS["image_indicator"]: + match = re.search(pattern, text_lower) + if match: + # Try to extract a description + desc = self._extract_image_description(text) + return True, desc + + # Also check for mathematical expressions that might be images + math_indicators = [ + r"\\int", r"\\frac", r"\\sqrt", r"\\sum", r"\\lim", + r"\^{", r"_{", r"→", r"∫", r"√", + ] + for indicator in math_indicators: + if indicator in text: + return True, "Expressão matemática complexa" + + return False, None + + def _extract_image_description(self, text: str) -> str: + """Extract a description for an image reference.""" + # Look for descriptive text near the image indicator + patterns = [ + r"(?:figura|gráfico|tabela)\s+(?:que\s+)?(?:mostra|representa|ilustra)\s+(.+?)(?:\.|$)", + r"(?:observe|analise|considere)\s+(.+?)(?:\.|$)", + ] + + for pattern in patterns: + match = re.search(pattern, text, re.IGNORECASE) + if match: + return match.group(1).strip() + + # Default description + if "gráfico" in text.lower(): + return "Gráfico" + elif "tabela" in text.lower(): + return "Tabela" + elif "figura" in text.lower(): + return "Figura" + + return "Imagem ou expressão visual" + + def _is_coordination_block(self, text: str) -> bool: + """Check if text is a coordination/signature block.""" + text_lower = text.lower() + + for pattern in self.PATTERNS["coordination"]: + if re.search(pattern, text_lower): + return True + + return False + + def _detect_images_to_upload( + self, + metadata: ExtractedMetadata, + questions: List[ExtractedQuestion], + blocks: List[TextBlock], + full_text: str + ) -> List[ImageToUpload]: + """Detect regions that should be uploaded as images.""" + images = [] + + # Build base filename + base_name = "prova" + if metadata.subject_name.value: + base_name = f"prova-{metadata.subject_name.value.lower().replace(' ', '-')}" + if metadata.school_year_start.value and metadata.school_year_end.value: + base_name = f"{base_name}-{metadata.school_year_start.value}-{metadata.school_year_end.value}" + if metadata.variant.value: + base_name = f"{base_name}-serie-{metadata.variant.value.lower()}" + + # Check for header/logo region + if blocks and len(blocks) > 5: + header_text = " ".join(b.text for b in blocks[:5]) + if any(kw in header_text.lower() for kw in ["república", "angola", "ministério", "governo", "gabinete"]): + images.append(ImageToUpload( + suggested_filename=f"{base_name}-cabecalho.png", + description="Cabeçalho oficial com brasão/logo institucional", + region="cabecalho", + page_index=0, + )) + + # Add images for questions with visual content + for question in questions: + if question.has_image: + images.append(ImageToUpload( + suggested_filename=f"{base_name}-questao-{question.number}.png", + description=question.image_description or f"Imagem da questão {question.number}", + region=f"questao_{question.number}", + page_index=question.page_index, + )) + + # Check for coordination/signature at footer + for pattern in self.PATTERNS["coordination"]: + if re.search(pattern, full_text.lower()): + images.append(ImageToUpload( + suggested_filename=f"{base_name}-assinatura-coordenacao.png", + description="Assinatura da coordenação no rodapé da prova com texto A COORDENAÇÃO", + region="rodape", + page_index=len(set(b.page_index for b in blocks)) - 1 if blocks else 0, + )) + break + + return images + + def _infer_question_type(self, question: ExtractedQuestion) -> Tuple[QuestionType, float]: + """Infer question type based on text and options.""" + text_lower = question.text.lower() + + # If has options, it's multiple choice + if question.options and len(question.options) > 0: + return QuestionType.MULTIPLA_ESCOLHA, 0.95 + + # Check keywords for question type + for q_type, keywords in self.QUESTION_TYPE_KEYWORDS.items(): + matches = sum(1 for kw in keywords if kw in text_lower) + if matches > 0: + confidence = min(0.7 + (matches * 0.1), 0.95) + return q_type, confidence + + # Default to dissertativa for math/science exams + return QuestionType.DISSERTATIVA, 0.7 + + def _estimate_confidence_from_match( + self, + match: re.Match, + full_text: str, + blocks: List[TextBlock], + ) -> float: + """Estimate confidence for a regex match based on surrounding text blocks.""" + match_text = match.group(0) + base_confidence = 0.85 + + # Adjust based on block confidence + for block in blocks: + if match_text in block.text: + base_confidence = (base_confidence + block.confidence) / 2 + break + + return base_confidence + + def _clean_text(self, text: str) -> str: + """Clean and normalize extracted text.""" + # Remove extra whitespace + text = re.sub(r"\s+", " ", text).strip() + + # Remove score annotations + for pattern in self.PATTERNS["cotacao"]: + text = re.sub(pattern, "", text, flags=re.IGNORECASE) + + # Remove inline cotação blocks (e.g., "COTACAO:1) a - 4 V,b - 4 V/2) 5 V...") + text = re.sub(r"COTACAO\s*:\s*[^.]*(?:\.|$)", "", text, flags=re.IGNORECASE) + text = re.sub(r"COTAÇÃO\s*:\s*[^.]*(?:\.|$)", "", text, flags=re.IGNORECASE) + + return text.strip() + + def _collect_unmapped( + self, + blocks: List[TextBlock], + metadata: ExtractedMetadata, + questions: List[ExtractedQuestion], + ) -> List[UnmappedContent]: + """Collect text blocks that weren't mapped to metadata or questions.""" + # For simplicity, return empty list - full implementation would track used blocks + return [] + + def _generate_metadata_warnings(self, metadata: ExtractedMetadata) -> List[Warning]: + """Generate warnings for low-confidence metadata fields.""" + warnings = [] + + fields = [ + ("schoolYearStart", metadata.school_year_start), + ("schoolYearEnd", metadata.school_year_end), + ("subjectName", metadata.subject_name), + ("classGrade", metadata.class_grade), + ("examType", metadata.exam_type), + ] + + for field_name, field_value in fields: + if field_value.value is not None and field_value.confidence < self.low_confidence_threshold: + warnings.append(Warning( + code="LOW_CONFIDENCE", + field=field_name, + confidence=field_value.confidence, + )) + + return warnings + + def _generate_question_warnings(self, questions: List[ExtractedQuestion]) -> List[Warning]: + """Generate warnings for low-confidence questions.""" + warnings = [] + + for question in questions: + if question.confidence < self.low_confidence_threshold: + warnings.append(Warning( + code="LOW_CONFIDENCE", + field=f"question_{question.number}", + confidence=question.confidence, + )) + + if question.question_type == QuestionType.UNKNOWN: + warnings.append(Warning( + code="UNKNOWN_QUESTION_TYPE", + field=f"question_{question.number}", + confidence=question.question_type_confidence, + )) + + return warnings + + + def _clean_null_fields(self, metadata: ExtractedMetadata) -> ExtractedMetadata: + """ + Clean up metadata fields that have no value or low confidence. + This prevents persisting empty/null data to the database. + """ + # Fields that should not be persisted if null/empty + # We keep the structure but ensure confidence is 0 for null values + + # For fields with None value, ensure confidence is 0 + if metadata.school_year_start.value is None: + metadata.school_year_start = MetadataField(None, 0.0) + if metadata.school_year_end.value is None: + metadata.school_year_end = MetadataField(None, 0.0) + if metadata.class_grade.value is None: + metadata.class_grade = MetadataField(None, 0.0) + if metadata.class_info.value is None: + metadata.class_info = MetadataField(None, 0.0) + if metadata.course_name.value is None: + metadata.course_name = MetadataField(None, 0.0) + if metadata.subject_name.value is None: + metadata.subject_name = MetadataField(None, 0.0) + if metadata.exam_type.value is None: + metadata.exam_type = MetadataField(None, 0.0) + if metadata.duration_minutes.value is None: + metadata.duration_minutes = MetadataField(None, 0.0) + if metadata.variant.value is None: + metadata.variant = MetadataField(None, 0.0) + if metadata.total_max_score.value is None: + metadata.total_max_score = MetadataField(None, 0.0) + + # Clean course name if it contains garbage + if metadata.course_name.value: + course = metadata.course_name.value + # If course is just "DE" or similar garbage, set to None + if course in ['DE', 'DA', 'DO', 'DAS', 'DOS', 'E', 'A', 'O']: + metadata.course_name = MetadataField(None, 0.0) + metadata.course = MetadataField(None, 0.0) + + return metadata + + +def normalize_text(text: str) -> str: + """ + Normalize text for consistent processing. + + - Removes extra whitespace + - Normalizes quotes and dashes + - Fixes common OCR errors + """ + # Normalize whitespace + text = re.sub(r"\s+", " ", text).strip() + + # Normalize quotes + text = re.sub(r"[""„‟]", '"', text) + text = re.sub(r"[''‚‛]", "'", text) + + # Normalize dashes + text = re.sub(r"[–—−]", "-", text) + + # Common OCR fixes for Portuguese + ocr_fixes = { + r"12ª": "12ª", + r"12a": "12ª", + r"12°": "12ª", + r"Séria": "Série", + r"Sèrie": "Série", + r"Matematic[ao]": "Matemática", + } + + for pattern, replacement in ocr_fixes.items(): + text = re.sub(pattern, replacement, text, flags=re.IGNORECASE) + + return text + + +def detect_language(text: str) -> str: + """ + Detect the primary language of the text. + + Returns language code (pt, en, etc.) + """ + # Portuguese indicators + pt_words = ["prova", "exame", "classe", "ano", "duração", "valores", "questão", + "calcule", "determine", "resolva", "trimestre", "letivo"] + + # English indicators + en_words = ["exam", "class", "year", "duration", "points", "question", + "calculate", "determine", "solve", "term"] + + text_lower = text.lower() + pt_count = sum(1 for word in pt_words if word in text_lower) + en_count = sum(1 for word in en_words if word in text_lower) + + if pt_count > en_count: + return "pt" + elif en_count > pt_count: + return "en" + else: + return "pt" # Default to Portuguese for Angolan context diff --git a/services/ocr-service/app/ocr/preprocessing.py b/services/ocr-service/app/ocr/preprocessing.py index ac5e6be..740d647 100644 --- a/services/ocr-service/app/ocr/preprocessing.py +++ b/services/ocr-service/app/ocr/preprocessing.py @@ -1 +1,125 @@ # Image preprocessing for better OCR +import cv2 +import numpy as np +from PIL import Image + + +def preprocess_image(image: Image.Image) -> Image.Image: + """ + Preprocess image for better OCR results. + + Steps: + 1. Convert to grayscale + 2. Apply adaptive thresholding + 3. Denoise + 4. Deskew if needed + + Args: + image: PIL Image object + + Returns: + Preprocessed PIL Image + """ + # Convert PIL to OpenCV format + img_array = np.array(image) + + # Convert to grayscale if needed + if len(img_array.shape) == 3: + gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY) + else: + gray = img_array + + # Apply denoising + denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21) + + # Apply adaptive thresholding + thresh = cv2.adaptiveThreshold( + denoised, + 255, + cv2.ADAPTIVE_THRESH_GAUSSIAN_C, + cv2.THRESH_BINARY, + 11, + 2 + ) + + # Convert back to PIL + return Image.fromarray(thresh) + + +def deskew_image(image: Image.Image) -> Image.Image: + """ + Correct image skew/rotation. + + Args: + image: PIL Image object + + Returns: + Deskewed PIL Image + """ + img_array = np.array(image) + + if len(img_array.shape) == 3: + gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY) + else: + gray = img_array + + # Find edges + edges = cv2.Canny(gray, 50, 150, apertureSize=3) + + # Find lines using Hough transform + lines = cv2.HoughLinesP( + edges, 1, np.pi / 180, 100, + minLineLength=100, maxLineGap=10 + ) + + if lines is None: + return image + + # Calculate average angle + angles = [] + for line in lines: + x1, y1, x2, y2 = line[0] + angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi + if abs(angle) < 45: # Filter out vertical lines + angles.append(angle) + + if not angles: + return image + + avg_angle = np.median(angles) + + # Rotate image + (h, w) = img_array.shape[:2] + center = (w // 2, h // 2) + M = cv2.getRotationMatrix2D(center, avg_angle, 1.0) + rotated = cv2.warpAffine( + img_array, M, (w, h), + flags=cv2.INTER_CUBIC, + borderMode=cv2.BORDER_REPLICATE + ) + + return Image.fromarray(rotated) + + +def enhance_contrast(image: Image.Image) -> Image.Image: + """ + Enhance image contrast using CLAHE. + + Args: + image: PIL Image object + + Returns: + Contrast-enhanced PIL Image + """ + img_array = np.array(image) + + if len(img_array.shape) == 3: + gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY) + else: + gray = img_array + + # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization) + clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) + enhanced = clahe.apply(gray) + + return Image.fromarray(enhanced) diff --git a/services/ocr-service/env.example b/services/ocr-service/env.example new file mode 100644 index 0000000..b0e1468 --- /dev/null +++ b/services/ocr-service/env.example @@ -0,0 +1,96 @@ +# OCR Service Environment Configuration +# Copy this file to .env and adjust values as needed + +# ============================================================================= +# SERVICE CONFIGURATION +# ============================================================================= +SERVICE_NAME=ocr-service +SERVICE_VERSION=1.0.0 +ENVIRONMENT=development +DEBUG=true + +# ============================================================================= +# SERVER CONFIGURATION +# ============================================================================= +HOST=0.0.0.0 +PORT=8000 +WORKERS=1 + +# ============================================================================= +# OCR CONFIGURATION +# ============================================================================= +# Primary language for OCR (pt, en, es, fr, de, etc.) +OCR_LANG=pt + +# Use GPU acceleration (requires CUDA and paddlepaddle-gpu) +OCR_USE_GPU=false + +# Use angle classification for rotated text detection +OCR_USE_ANGLE_CLS=true + +# Show PaddleOCR internal logs +OCR_SHOW_LOG=false + +# Custom model directories (optional - leave empty for default models) +# OCR_DET_MODEL_DIR=/path/to/detection/model +# OCR_REC_MODEL_DIR=/path/to/recognition/model +# OCR_CLS_MODEL_DIR=/path/to/classification/model + +# ============================================================================= +# PROCESSING CONFIGURATION +# ============================================================================= +# Maximum image size in MB +MAX_IMAGE_SIZE_MB=20.0 + +# Maximum number of images per request +MAX_IMAGES_PER_REQUEST=10 + +# Timeout for OCR processing in seconds +PROCESSING_TIMEOUT_SECONDS=120 + +# Minimum confidence threshold for text extraction (0.0 - 1.0) +MIN_CONFIDENCE_THRESHOLD=0.5 + +# ============================================================================= +# IMAGE PREPROCESSING +# ============================================================================= +# Enable automatic deskewing (rotation correction) +ENABLE_DESKEW=true + +# Enable noise reduction +ENABLE_DENOISE=true + +# Target DPI for image processing +TARGET_DPI=300 + +# ============================================================================= +# CACHING (Optional) +# ============================================================================= +ENABLE_CACHE=false +CACHE_TTL_SECONDS=3600 +# REDIS_URL=redis://localhost:6379/0 + +# ============================================================================= +# SECURITY (Optional) +# ============================================================================= +# Enable authentication +ENABLE_AUTH=false + +# JWT configuration (if authentication is enabled) +# JWT_SECRET_KEY=your-secret-key-here +# JWT_ALGORITHM=HS256 + +# Simple API key authentication (alternative to JWT) +# API_KEY=your-api-key-here + +# ============================================================================= +# LOGGING +# ============================================================================= +LOG_LEVEL=INFO +LOG_FORMAT=json + +# ============================================================================= +# METRICS (Optional) +# ============================================================================= +ENABLE_METRICS=true +METRICS_PORT=9090 diff --git a/services/ocr-service/pytest.ini b/services/ocr-service/pytest.ini new file mode 100644 index 0000000..a7ca7b6 --- /dev/null +++ b/services/ocr-service/pytest.ini @@ -0,0 +1,43 @@ +[pytest] +# Test discovery +testpaths = tests +python_files = test_*.py +python_classes = Test* +python_functions = test_* + +# Markers +markers = + slow: marks tests as slow (deselect with '-m "not slow"') + integration: marks tests as integration tests (deselect with '-m "not integration"') + gpu: marks tests that require GPU + +# Async mode +asyncio_mode = auto + +# Logging +log_cli = true +log_cli_level = INFO +log_cli_format = %(asctime)s [%(levelname)8s] %(name)s: %(message)s +log_cli_date_format = %Y-%m-%d %H:%M:%S + +# Coverage +addopts = + -v + --tb=short + --strict-markers + -ra + +# Ignore warnings +filterwarnings = + ignore::DeprecationWarning + ignore::PendingDeprecationWarning + ignore::UserWarning + +# Timeout for tests (in seconds) +timeout = 120 + +# Parallel execution (requires pytest-xdist) +# addopts = -n auto + +# Minimum version +minversion = 8.0 diff --git a/services/ocr-service/requirements.txt b/services/ocr-service/requirements.txt index 0164c92..162aa5b 100644 --- a/services/ocr-service/requirements.txt +++ b/services/ocr-service/requirements.txt @@ -5,3 +5,4 @@ pytesseract>=0.3.10 Pillow>=10.0.0 opencv-python-headless>=4.8.0 numpy>=1.24.0 +pydantic-settings>=2.0.0 diff --git a/services/ocr-service/tests/__init__.py b/services/ocr-service/tests/__init__.py new file mode 100644 index 0000000..bd4bff8 --- /dev/null +++ b/services/ocr-service/tests/__init__.py @@ -0,0 +1,8 @@ +""" +OCR Service Test Suite + +This package contains tests for the OCR service components: +- Unit tests for preprocessing, postprocessing, and engine modules +- Integration tests for the complete OCR pipeline +- API endpoint tests +""" diff --git a/services/ocr-service/tests/conftest.py b/services/ocr-service/tests/conftest.py new file mode 100644 index 0000000..28920e0 --- /dev/null +++ b/services/ocr-service/tests/conftest.py @@ -0,0 +1,400 @@ +""" +Pytest Configuration and Fixtures + +Shared fixtures for OCR service tests. +""" + +import io +import pytest +import numpy as np +from PIL import Image +from unittest.mock import MagicMock, patch + + +# ============================================================================= +# Image Fixtures +# ============================================================================= + +@pytest.fixture +def sample_image(): + """Create a sample test image (BGR format for OpenCV).""" + # Create a white image with some dark regions simulating text + img = np.ones((480, 640, 3), dtype=np.uint8) * 255 + + # Add dark regions to simulate text lines + img[50:80, 50:300] = 0 # Header line + img[100:130, 50:350] = 0 # Title line + img[150:175, 50:250] = 0 # Metadata line 1 + img[180:205, 50:200] = 0 # Metadata line 2 + + # Question 1 + img[240:265, 50:400] = 0 # Question text + img[280:300, 70:150] = 0 # Option A + img[310:330, 70:160] = 0 # Option B + img[340:360, 70:155] = 0 # Option C + img[370:390, 70:165] = 0 # Option D + + return img + + +@pytest.fixture +def sample_grayscale_image(): + """Create a sample grayscale test image.""" + img = np.ones((480, 640), dtype=np.uint8) * 255 + img[50:100, 50:300] = 0 + img[150:200, 50:350] = 0 + return img + + +@pytest.fixture +def sample_pil_image(): + """Create a sample PIL Image.""" + img = Image.new("RGB", (640, 480), color="white") + return img + + +@pytest.fixture +def sample_image_bytes(sample_pil_image): + """Create sample image as PNG bytes.""" + buffer = io.BytesIO() + sample_pil_image.save(buffer, format="PNG") + buffer.seek(0) + return buffer.getvalue() + + +@pytest.fixture +def sample_jpeg_bytes(sample_pil_image): + """Create sample image as JPEG bytes.""" + buffer = io.BytesIO() + sample_pil_image.save(buffer, format="JPEG") + buffer.seek(0) + return buffer.getvalue() + + +@pytest.fixture +def large_image(): + """Create a large test image for resize testing.""" + return np.ones((5000, 5000, 3), dtype=np.uint8) * 255 + + +@pytest.fixture +def small_image(): + """Create a small test image.""" + return np.ones((100, 100, 3), dtype=np.uint8) * 255 + + +# ============================================================================= +# Text Block Fixtures +# ============================================================================= + +@pytest.fixture +def sample_text_blocks(): + """Sample text blocks for postprocessing tests.""" + from app.ocr.postprocessing import TextBlock + + return [ + TextBlock( + text="PROVA DE MATEMÁTICA", + confidence=0.95, + bbox=(50, 20, 300, 50), + page_index=0, + line_index=0 + ), + TextBlock( + text="Ano Letivo 2024/2025", + confidence=0.92, + bbox=(50, 60, 250, 90), + page_index=0, + line_index=1 + ), + TextBlock( + text="1º Trimestre", + confidence=0.90, + bbox=(50, 100, 200, 130), + page_index=0, + line_index=2 + ), + TextBlock( + text="Duração: 120 minutos", + confidence=0.88, + bbox=(50, 140, 250, 170), + page_index=0, + line_index=3 + ), + TextBlock( + text="Versão A", + confidence=0.94, + bbox=(450, 100, 520, 130), + page_index=0, + line_index=4 + ), + TextBlock( + text="1. Calcule o valor de x na equação 2x + 5 = 15:", + confidence=0.94, + bbox=(50, 200, 450, 230), + page_index=0, + line_index=5 + ), + TextBlock( + text="A) 5", + confidence=0.91, + bbox=(70, 240, 120, 270), + page_index=0, + line_index=6 + ), + TextBlock( + text="B) 10", + confidence=0.92, + bbox=(70, 280, 130, 310), + page_index=0, + line_index=7 + ), + TextBlock( + text="C) 15", + confidence=0.93, + bbox=(70, 320, 130, 350), + page_index=0, + line_index=8 + ), + TextBlock( + text="D) 20", + confidence=0.90, + bbox=(70, 360, 130, 390), + page_index=0, + line_index=9 + ), + TextBlock( + text="(5 pontos)", + confidence=0.87, + bbox=(460, 200, 540, 230), + page_index=0, + line_index=10 + ), + TextBlock( + text="2. Justifique por que o triângulo ABC é isósceles:", + confidence=0.89, + bbox=(50, 420, 400, 450), + page_index=0, + line_index=11 + ), + ] + + +@pytest.fixture +def english_text_blocks(): + """Sample English text blocks.""" + from app.ocr.postprocessing import TextBlock + + return [ + TextBlock( + text="MATHEMATICS EXAM", + confidence=0.95, + bbox=(50, 20, 300, 50), + page_index=0, + line_index=0 + ), + TextBlock( + text="School Year 2024/2025", + confidence=0.92, + bbox=(50, 60, 250, 90), + page_index=0, + line_index=1 + ), + TextBlock( + text="1. Calculate the value of x:", + confidence=0.94, + bbox=(50, 200, 350, 230), + page_index=0, + line_index=2 + ), + ] + + +# ============================================================================= +# Mock Fixtures +# ============================================================================= + +@pytest.fixture +def mock_paddle_ocr(): + """Mock PaddleOCR class.""" + with patch("app.ocr.engine.PaddleOCR") as mock_cls: + mock_instance = MagicMock() + mock_cls.return_value = mock_instance + + # Mock OCR result format + mock_instance.ocr.return_value = [ + [ + [[[10, 10], [200, 10], [200, 40], [10, 40]], ("PROVA DE MATEMÁTICA", 0.95)], + [[[10, 50], [250, 50], [250, 80], [10, 80]], ("Ano Letivo 2024/2025", 0.92)], + [[[10, 100], [200, 100], [200, 130], [10, 130]], ("1º Trimestre", 0.90)], + [[[10, 150], [300, 150], [300, 180], [10, 180]], ("1. Calcule o valor de x:", 0.95)], + [[[30, 200], [100, 200], [100, 230], [30, 230]], ("A) 5", 0.92)], + [[[30, 240], [110, 240], [110, 270], [30, 270]], ("B) 10", 0.91)], + [[[30, 280], [110, 280], [110, 310], [30, 310]], ("C) 15", 0.93)], + [[[30, 320], [110, 320], [110, 350], [30, 350]], ("D) 20", 0.90)], + ] + ] + + yield mock_cls + + +@pytest.fixture +def mock_paddle_ocr_empty(): + """Mock PaddleOCR with empty results.""" + with patch("app.ocr.engine.PaddleOCR") as mock_cls: + mock_instance = MagicMock() + mock_cls.return_value = mock_instance + mock_instance.ocr.return_value = [[]] + yield mock_cls + + +@pytest.fixture +def mock_paddle_ocr_error(): + """Mock PaddleOCR that raises an error.""" + with patch("app.ocr.engine.PaddleOCR") as mock_cls: + mock_instance = MagicMock() + mock_cls.return_value = mock_instance + mock_instance.ocr.side_effect = Exception("OCR processing failed") + yield mock_cls + + +# ============================================================================= +# PDF Fixtures +# ============================================================================= + +@pytest.fixture +def sample_pdf_content(): + """Create minimal valid PDF content for testing.""" + # This is a minimal valid PDF structure + pdf_content = b"""%PDF-1.4 +1 0 obj +<< /Type /Catalog /Pages 2 0 R >> +endobj +2 0 obj +<< /Type /Pages /Kids [3 0 R] /Count 1 >> +endobj +3 0 obj +<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >> +endobj +xref +0 4 +0000000000 65535 f +0000000009 00000 n +0000000058 00000 n +0000000115 00000 n +trailer +<< /Size 4 /Root 1 0 R >> +startxref +196 +%%EOF""" + return pdf_content + + +@pytest.fixture +def invalid_pdf_content(): + """Create invalid PDF content for testing.""" + return b"This is not a valid PDF file" + + +# ============================================================================= +# Configuration Fixtures +# ============================================================================= + +@pytest.fixture +def mock_settings(): + """Mock settings for testing.""" + with patch("app.config.settings") as mock: + mock.service_name = "ocr-service" + mock.service_version = "1.0.0" + mock.environment = "test" + mock.debug = True + mock.host = "0.0.0.0" + mock.port = 8000 + mock.workers = 1 + mock.ocr_lang = "pt" + mock.ocr_use_gpu = False + mock.ocr_use_angle_cls = True + mock.ocr_show_log = False + mock.max_image_size_mb = 20.0 + mock.max_images_per_request = 10 + mock.min_confidence_threshold = 0.5 + mock.enable_deskew = True + mock.enable_denoise = True + mock.target_dpi = 300 + mock.log_level = "INFO" + mock.log_format = "json" + mock.enable_metrics = False + yield mock + + +# ============================================================================= +# API Client Fixtures +# ============================================================================= + +@pytest.fixture +def test_client(): + """Create FastAPI test client.""" + from fastapi.testclient import TestClient + from app.main import app + + with TestClient(app) as client: + yield client + + +@pytest.fixture +def async_client(): + """Create async test client.""" + import httpx + from app.main import app + + return httpx.AsyncClient(app=app, base_url="http://test") + + +# ============================================================================= +# Pytest Configuration +# ============================================================================= + +def pytest_configure(config): + """Configure pytest markers.""" + config.addinivalue_line( + "markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')" + ) + config.addinivalue_line( + "markers", "integration: marks tests as integration tests" + ) + config.addinivalue_line( + "markers", "gpu: marks tests that require GPU" + ) + + +def pytest_collection_modifyitems(config, items): + """Modify test collection based on markers.""" + # Skip slow tests unless explicitly requested + if not config.getoption("--runslow", default=False): + skip_slow = pytest.mark.skip(reason="need --runslow option to run") + for item in items: + if "slow" in item.keywords: + item.add_marker(skip_slow) + + # Skip integration tests unless explicitly requested + if not config.getoption("--runintegration", default=False): + skip_integration = pytest.mark.skip(reason="need --runintegration option to run") + for item in items: + if "integration" in item.keywords: + item.add_marker(skip_integration) + + +def pytest_addoption(parser): + """Add custom command line options.""" + parser.addoption( + "--runslow", + action="store_true", + default=False, + help="run slow tests" + ) + parser.addoption( + "--runintegration", + action="store_true", + default=False, + help="run integration tests" + ) diff --git a/services/ocr-service/tests/test_engine.py b/services/ocr-service/tests/test_engine.py index 9f6f0b9..45ef52f 100644 --- a/services/ocr-service/tests/test_engine.py +++ b/services/ocr-service/tests/test_engine.py @@ -1 +1,655 @@ -# Tests for OCR engine +""" +OCR Engine Test Suite + +Comprehensive tests for the OCR processing engine. +""" + +import pytest +import numpy as np +from unittest.mock import Mock, patch, MagicMock +from PIL import Image +import io + +from app.ocr.engine import ( + OCREngine, + OCRResult, + DocumentInfo, + get_engine, + initialize_engine, + shutdown_engine, +) +from app.ocr.preprocessing import ( + ImagePreprocessor, + PreprocessingResult, + load_image_from_bytes, + load_image_from_pil, + image_to_bytes, +) +from app.ocr.postprocessing import ( + OCRPostprocessor, + TextBlock, + ExtractedMetadata, + ExtractedQuestion, + ExtractedOption, + QuestionType, + MetadataField, + normalize_text, + detect_language, +) + + +# ============================================================================= +# Fixtures +# ============================================================================= + +@pytest.fixture +def sample_image(): + """Create a sample test image.""" + # Create a simple white image with some text-like patterns + img = np.ones((480, 640, 3), dtype=np.uint8) * 255 + # Add some dark regions to simulate text + img[50:100, 50:200] = 0 + img[150:180, 50:300] = 0 + img[200:230, 50:250] = 0 + return img + + +@pytest.fixture +def sample_pil_image(): + """Create a sample PIL Image.""" + img = Image.new("RGB", (640, 480), color="white") + return img + + +@pytest.fixture +def sample_image_bytes(sample_pil_image): + """Create sample image as bytes.""" + buffer = io.BytesIO() + sample_pil_image.save(buffer, format="PNG") + buffer.seek(0) + return buffer.getvalue() + + +@pytest.fixture +def mock_paddle_ocr(): + """Mock PaddleOCR class.""" + with patch("app.ocr.engine.PaddleOCR") as mock_cls: + mock_instance = MagicMock() + mock_cls.return_value = mock_instance + + # Mock OCR result format + mock_instance.ocr.return_value = [ + [ + [[[10, 10], [200, 10], [200, 40], [10, 40]], ("1. Calcule o valor de x:", 0.95)], + [[[10, 50], [300, 50], [300, 80], [10, 80]], ("A) 5", 0.92)], + [[[10, 90], [300, 90], [300, 120], [10, 120]], ("B) 10", 0.91)], + [[[10, 130], [300, 130], [300, 160], [10, 160]], ("C) 15", 0.93)], + [[[10, 170], [300, 170], [300, 200], [10, 200]], ("D) 20", 0.90)], + ] + ] + + yield mock_cls + + +@pytest.fixture +def text_blocks(): + """Sample text blocks for postprocessing tests.""" + return [ + TextBlock(text="PROVA DE MATEMÁTICA", confidence=0.95, bbox=(50, 20, 300, 50), page_index=0, line_index=0), + TextBlock(text="Ano Letivo 2024/2025", confidence=0.92, bbox=(50, 60, 250, 90), page_index=0, line_index=1), + TextBlock(text="1º Trimestre", confidence=0.90, bbox=(50, 100, 200, 130), page_index=0, line_index=2), + TextBlock(text="Duração: 120 minutos", confidence=0.88, bbox=(50, 140, 250, 170), page_index=0, line_index=3), + TextBlock(text="1. Calcule o valor de x na equação:", confidence=0.94, bbox=(50, 200, 400, 230), page_index=0, line_index=4), + TextBlock(text="A) 5", confidence=0.91, bbox=(70, 240, 120, 270), page_index=0, line_index=5), + TextBlock(text="B) 10", confidence=0.92, bbox=(70, 280, 130, 310), page_index=0, line_index=6), + TextBlock(text="C) 15", confidence=0.93, bbox=(70, 320, 130, 350), page_index=0, line_index=7), + TextBlock(text="D) 20", confidence=0.90, bbox=(70, 360, 130, 390), page_index=0, line_index=8), + TextBlock(text="2. Justifique sua resposta:", confidence=0.89, bbox=(50, 420, 350, 450), page_index=0, line_index=9), + ] + + +# ============================================================================= +# Preprocessing Tests +# ============================================================================= + +class TestImagePreprocessor: + """Tests for ImagePreprocessor class.""" + + def test_init(self): + """Test preprocessor initialization.""" + preprocessor = ImagePreprocessor( + enable_deskew=True, + enable_denoise=True, + target_dpi=300, + ) + assert preprocessor.enable_deskew is True + assert preprocessor.enable_denoise is True + assert preprocessor.target_dpi == 300 + + def test_preprocess_grayscale_conversion(self, sample_image): + """Test that preprocessing converts to grayscale and back.""" + preprocessor = ImagePreprocessor(enable_deskew=False, enable_denoise=False) + result = preprocessor.preprocess(sample_image) + + assert isinstance(result, PreprocessingResult) + assert result.image.shape[2] == 3 # Should be BGR + assert "grayscale_conversion" in result.preprocessing_applied + + def test_preprocess_with_denoise(self, sample_image): + """Test preprocessing with noise reduction enabled.""" + preprocessor = ImagePreprocessor(enable_deskew=False, enable_denoise=True) + result = preprocessor.preprocess(sample_image) + + assert "denoise" in result.preprocessing_applied + + def test_preprocess_with_deskew(self, sample_image): + """Test preprocessing with deskewing enabled.""" + preprocessor = ImagePreprocessor(enable_deskew=True, enable_denoise=False) + result = preprocessor.preprocess(sample_image) + + # Deskew might not always find lines to correct + assert isinstance(result.rotation_angle, float) + + def test_preprocess_calculates_hash(self, sample_image): + """Test that preprocessing calculates image hash.""" + preprocessor = ImagePreprocessor() + result = preprocessor.preprocess(sample_image) + + assert result.image_hash is not None + assert len(result.image_hash) == 64 # SHA-256 hex + + def test_resize_if_needed_large_image(self): + """Test resizing large images.""" + large_image = np.zeros((5000, 5000, 3), dtype=np.uint8) + resized, scale = ImagePreprocessor.resize_if_needed(large_image, max_dimension=4096) + + assert max(resized.shape[:2]) <= 4096 + assert scale < 1.0 + + def test_resize_if_needed_small_image(self): + """Test that small images are not resized unnecessarily.""" + small_image = np.zeros((500, 500, 3), dtype=np.uint8) + resized, scale = ImagePreprocessor.resize_if_needed(small_image) + + assert scale == 1.0 + assert resized.shape == small_image.shape + + +class TestLoadImageFunctions: + """Tests for image loading functions.""" + + def test_load_image_from_bytes(self, sample_image_bytes): + """Test loading image from bytes.""" + image = load_image_from_bytes(sample_image_bytes) + + assert isinstance(image, np.ndarray) + assert len(image.shape) == 3 + assert image.shape[2] == 3 # BGR + + def test_load_image_from_bytes_invalid(self): + """Test loading invalid bytes raises ValueError.""" + with pytest.raises(ValueError, match="Failed to decode"): + load_image_from_bytes(b"invalid image data") + + def test_load_image_from_pil(self, sample_pil_image): + """Test loading image from PIL Image.""" + image = load_image_from_pil(sample_pil_image) + + assert isinstance(image, np.ndarray) + assert image.shape == (480, 640, 3) + + def test_image_to_bytes(self, sample_image): + """Test converting image to bytes.""" + img_bytes = image_to_bytes(sample_image, format="PNG") + + assert isinstance(img_bytes, bytes) + assert len(img_bytes) > 0 + # Verify it's a valid PNG + assert img_bytes[:8] == b'\x89PNG\r\n\x1a\n' + + +# ============================================================================= +# Postprocessing Tests +# ============================================================================= + +class TestOCRPostprocessor: + """Tests for OCRPostprocessor class.""" + + def test_init(self): + """Test postprocessor initialization.""" + postprocessor = OCRPostprocessor( + min_confidence_threshold=0.5, + low_confidence_threshold=0.8, + ) + assert postprocessor.min_confidence_threshold == 0.5 + assert postprocessor.low_confidence_threshold == 0.8 + + def test_process_extracts_metadata(self, text_blocks): + """Test that postprocessing extracts metadata.""" + postprocessor = OCRPostprocessor() + metadata, questions, unmapped, warnings = postprocessor.process(text_blocks) + + assert isinstance(metadata, ExtractedMetadata) + assert metadata.school_year.value == "2024/2025" + assert metadata.term.value is not None + + def test_process_extracts_questions(self, text_blocks): + """Test that postprocessing extracts questions.""" + postprocessor = OCRPostprocessor() + metadata, questions, unmapped, warnings = postprocessor.process(text_blocks) + + assert len(questions) >= 1 + assert isinstance(questions[0], ExtractedQuestion) + assert questions[0].number == 1 + + def test_process_detects_multiple_choice(self, text_blocks): + """Test detection of multiple choice questions.""" + postprocessor = OCRPostprocessor() + metadata, questions, unmapped, warnings = postprocessor.process(text_blocks) + + # First question should be multiple choice (has options A, B, C, D) + if len(questions) > 0: + first_question = questions[0] + if first_question.options: + assert first_question.question_type == QuestionType.MULTIPLE_CHOICE + + def test_process_generates_warnings(self, text_blocks): + """Test that warnings are generated for low confidence fields.""" + postprocessor = OCRPostprocessor(low_confidence_threshold=0.95) + metadata, questions, unmapped, warnings = postprocessor.process(text_blocks) + + # With threshold of 0.95, most fields should trigger warnings + assert isinstance(warnings, list) + + +class TestTextNormalization: + """Tests for text normalization functions.""" + + def test_normalize_text_removes_extra_whitespace(self): + """Test whitespace normalization.""" + text = "Hello world test" + normalized = normalize_text(text) + assert normalized == "Hello world test" + + def test_normalize_text_normalizes_quotes(self): + """Test quote normalization.""" + text = '"Hello" and 'world'" + normalized = normalize_text(text) + assert '"' in normalized + assert "'" in normalized + + def test_normalize_text_normalizes_dashes(self): + """Test dash normalization.""" + text = "option–one—two−three" + normalized = normalize_text(text) + assert "–" not in normalized + assert "—" not in normalized + + +class TestLanguageDetection: + """Tests for language detection.""" + + def test_detect_portuguese(self): + """Test detection of Portuguese text.""" + text = "O aluno deve resolver as questões de matemática com atenção" + lang = detect_language(text) + assert lang == "pt" + + def test_detect_english(self): + """Test detection of English text.""" + text = "The student should solve the math questions carefully" + lang = detect_language(text) + assert lang == "en" + + def test_detect_empty_text(self): + """Test detection with empty text defaults to Portuguese.""" + lang = detect_language("") + assert lang == "pt" + + +class TestQuestionType: + """Tests for question type enum and inference.""" + + def test_question_type_values(self): + """Test QuestionType enum values.""" + assert QuestionType.MULTIPLE_CHOICE.value == "multiple_choice" + assert QuestionType.SHORT_ANSWER.value == "short_answer" + assert QuestionType.DEVELOPMENT.value == "development" + assert QuestionType.TRUE_FALSE.value == "true_false" + assert QuestionType.UNKNOWN.value == "unknown" + + +class TestExtractedQuestion: + """Tests for ExtractedQuestion dataclass.""" + + def test_question_confidence_calculation(self): + """Test overall confidence calculation.""" + question = ExtractedQuestion( + number=1, + text="Test question", + text_confidence=0.9, + question_type=QuestionType.SHORT_ANSWER, + question_type_confidence=0.85, + ) + + # Confidence should be average of text and type confidence + expected = (0.9 + 0.85) / 2 + assert abs(question.confidence - expected) < 0.01 + + def test_question_with_options_confidence(self): + """Test confidence with options included.""" + question = ExtractedQuestion( + number=1, + text="Test question", + text_confidence=0.9, + question_type=QuestionType.MULTIPLE_CHOICE, + question_type_confidence=0.95, + options=[ + ExtractedOption("A", "Option 1", 0.88), + ExtractedOption("B", "Option 2", 0.92), + ], + ) + + # Should include option confidences + assert question.confidence > 0 + + def test_question_to_dict(self): + """Test conversion to dictionary.""" + question = ExtractedQuestion( + number=1, + text="What is 2+2?", + text_confidence=0.95, + question_type=QuestionType.SHORT_ANSWER, + question_type_confidence=0.9, + max_score=5.0, + max_score_confidence=0.85, + ) + + result = question.to_dict() + + assert result["number"] == 1 + assert result["text"]["value"] == "What is 2+2?" + assert result["questionType"]["value"] == "short_answer" + assert result["maxScore"]["value"] == 5.0 + + +# ============================================================================= +# OCR Engine Tests +# ============================================================================= + +class TestOCREngine: + """Tests for OCREngine class.""" + + def test_init(self): + """Test engine initialization.""" + engine = OCREngine( + lang="pt", + use_gpu=False, + use_angle_cls=True, + ) + + assert engine.lang == "pt" + assert engine.use_gpu is False + assert engine.use_angle_cls is True + assert engine._initialized is False + + def test_health_check_not_initialized(self): + """Test health check when not initialized.""" + engine = OCREngine() + health = engine.health_check() + + assert health["initialized"] is False + assert health["status"] == "not_initialized" + + @patch("app.ocr.engine.PaddleOCR") + def test_initialize(self, mock_paddle): + """Test engine initialization.""" + mock_paddle.return_value = MagicMock() + + engine = OCREngine() + engine.initialize() + + assert engine._initialized is True + mock_paddle.assert_called_once() + + @patch("app.ocr.engine.PaddleOCR") + def test_process_image(self, mock_paddle, sample_image): + """Test processing a single image.""" + # Setup mock + mock_instance = MagicMock() + mock_paddle.return_value = mock_instance + mock_instance.ocr.return_value = [ + [ + [[[10, 10], [200, 10], [200, 40], [10, 40]], ("Test text", 0.95)], + ] + ] + + engine = OCREngine() + result = engine.process_image(sample_image) + + assert isinstance(result, OCRResult) + assert result.status in ["success", "partial", "error"] + assert result.processing_time_ms >= 0 + + @patch("app.ocr.engine.PaddleOCR") + def test_process_image_with_error(self, mock_paddle, sample_image): + """Test error handling during processing.""" + mock_instance = MagicMock() + mock_paddle.return_value = mock_instance + mock_instance.ocr.side_effect = Exception("OCR failed") + + engine = OCREngine() + result = engine.process_image(sample_image) + + assert result.status == "error" + assert result.error_message is not None + + @patch("app.ocr.engine.PaddleOCR") + def test_process_bytes(self, mock_paddle, sample_image_bytes): + """Test processing image from bytes.""" + mock_instance = MagicMock() + mock_paddle.return_value = mock_instance + mock_instance.ocr.return_value = [[]] + + engine = OCREngine() + result = engine.process_bytes(sample_image_bytes) + + assert isinstance(result, OCRResult) + + def test_process_bytes_invalid(self): + """Test processing invalid bytes.""" + engine = OCREngine() + result = engine.process_bytes(b"invalid") + + assert result.status == "error" + assert "Failed to load image" in result.error_message + + @patch("app.ocr.engine.PaddleOCR") + def test_process_multiple_images(self, mock_paddle, sample_image): + """Test processing multiple images.""" + mock_instance = MagicMock() + mock_paddle.return_value = mock_instance + mock_instance.ocr.return_value = [ + [ + [[[10, 10], [200, 10], [200, 40], [10, 40]], ("Page content", 0.9)], + ] + ] + + engine = OCREngine() + images = [sample_image, sample_image] + result = engine.process_images(images) + + assert isinstance(result, OCRResult) + assert result.document.page_count == 2 + + @patch("app.ocr.engine.PaddleOCR") + def test_process_empty_images_list(self, mock_paddle): + """Test processing empty images list.""" + engine = OCREngine() + result = engine.process_images([]) + + assert result.status == "error" + assert "No images provided" in result.error_message + + def test_shutdown(self): + """Test engine shutdown.""" + engine = OCREngine() + engine.shutdown() + + assert engine._ocr is None + assert engine._initialized is False + + +class TestOCRResult: + """Tests for OCRResult dataclass.""" + + def test_to_dict(self): + """Test OCRResult to dictionary conversion.""" + result = OCRResult( + status="success", + request_id="req-test-123", + processing_time_ms=1500, + overall_confidence=0.85, + document=DocumentInfo( + page_count=2, + main_language="pt", + has_tables=False, + ), + metadata=ExtractedMetadata(), + questions=[], + unmapped_content=[], + warnings=[], + ) + + result_dict = result.to_dict() + + assert result_dict["status"] == "success" + assert result_dict["requestId"] == "req-test-123" + assert result_dict["processingTimeMs"] == 1500 + assert result_dict["overallConfidence"] == 0.85 + assert result_dict["document"]["pageCount"] == 2 + assert result_dict["document"]["mainLanguage"] == "pt" + + def test_to_dict_with_error(self): + """Test OCRResult with error message.""" + result = OCRResult( + status="error", + request_id="req-error-123", + processing_time_ms=100, + overall_confidence=0.0, + document=DocumentInfo(page_count=0, main_language="pt", has_tables=False), + metadata=ExtractedMetadata(), + questions=[], + unmapped_content=[], + warnings=[], + error_message="Processing failed", + ) + + result_dict = result.to_dict() + + assert result_dict["status"] == "error" + assert result_dict["errorMessage"] == "Processing failed" + + +# ============================================================================= +# Module-level Functions Tests +# ============================================================================= + +class TestModuleFunctions: + """Tests for module-level functions.""" + + @patch("app.ocr.engine._default_engine", None) + def test_get_engine_creates_instance(self): + """Test that get_engine creates a new instance.""" + engine = get_engine() + + assert isinstance(engine, OCREngine) + + @patch("app.ocr.engine._default_engine", None) + def test_get_engine_returns_same_instance(self): + """Test that get_engine returns the same instance.""" + engine1 = get_engine() + engine2 = get_engine() + + assert engine1 is engine2 + + @patch("app.ocr.engine.PaddleOCR") + @patch("app.ocr.engine._default_engine", None) + def test_initialize_engine(self, mock_paddle): + """Test initialize_engine function.""" + mock_paddle.return_value = MagicMock() + + initialize_engine() + + engine = get_engine() + assert engine._initialized is True + + @patch("app.ocr.engine._default_engine") + def test_shutdown_engine(self, mock_engine): + """Test shutdown_engine function.""" + mock_engine.shutdown = MagicMock() + + shutdown_engine() + + # Should call shutdown on the engine + # Note: actual behavior depends on global state + + +# ============================================================================= +# Integration Tests (marked for optional execution) +# ============================================================================= + +@pytest.mark.integration +class TestOCRIntegration: + """Integration tests that require actual PaddleOCR.""" + + @pytest.mark.slow + def test_real_ocr_processing(self, sample_image): + """Test with actual PaddleOCR (slow, requires models).""" + try: + from paddleocr import PaddleOCR + except ImportError: + pytest.skip("PaddleOCR not installed") + + engine = OCREngine(lang="en", use_gpu=False) + result = engine.process_image(sample_image) + + assert isinstance(result, OCRResult) + assert result.status in ["success", "partial", "error"] + + +# ============================================================================= +# Async Tests +# ============================================================================= + +@pytest.mark.asyncio +class TestAsyncOCR: + """Async tests for OCR engine.""" + + @patch("app.ocr.engine.PaddleOCR") + async def test_process_image_async(self, mock_paddle, sample_image): + """Test async image processing.""" + mock_instance = MagicMock() + mock_paddle.return_value = mock_instance + mock_instance.ocr.return_value = [[]] + + engine = OCREngine() + result = await engine.process_image_async(sample_image) + + assert isinstance(result, OCRResult) + + @patch("app.ocr.engine.PaddleOCR") + async def test_process_bytes_async(self, mock_paddle, sample_image_bytes): + """Test async bytes processing.""" + mock_instance = MagicMock() + mock_paddle.return_value = mock_instance + mock_instance.ocr.return_value = [[]] + + engine = OCREngine() + result = await engine.process_bytes_async(sample_image_bytes) + + assert isinstance(result, OCRResult) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/services/ocr-service/tests/test_routes.py b/services/ocr-service/tests/test_routes.py new file mode 100644 index 0000000..f8c5a2d --- /dev/null +++ b/services/ocr-service/tests/test_routes.py @@ -0,0 +1,395 @@ +""" +OCR Service API Routes Test Suite + +Tests for the FastAPI endpoints of the OCR service. +""" + +import pytest +from fastapi.testclient import TestClient +from unittest.mock import patch, MagicMock, AsyncMock +import io +from PIL import Image + +from app.main import app +from app.ocr.engine import OCRResult, DocumentInfo +from app.ocr.postprocessing import ExtractedMetadata, ExtractedQuestion, QuestionType + + +# ============================================================================= +# Fixtures +# ============================================================================= + +@pytest.fixture +def client(): + """Create a test client for the FastAPI app.""" + return TestClient(app) + + +@pytest.fixture +def sample_image_bytes(): + """Create a sample PNG image as bytes.""" + img = Image.new("RGB", (100, 100), color="white") + buffer = io.BytesIO() + img.save(buffer, format="PNG") + buffer.seek(0) + return buffer.getvalue() + + +@pytest.fixture +def sample_jpeg_bytes(): + """Create a sample JPEG image as bytes.""" + img = Image.new("RGB", (100, 100), color="white") + buffer = io.BytesIO() + img.save(buffer, format="JPEG") + buffer.seek(0) + return buffer.getvalue() + + +@pytest.fixture +def mock_ocr_result(): + """Create a mock OCR result.""" + return OCRResult( + status="success", + request_id="req-test-123", + processing_time_ms=1500, + overall_confidence=0.85, + document=DocumentInfo( + page_count=1, + main_language="pt", + has_tables=False, + ), + metadata=ExtractedMetadata(), + questions=[ + ExtractedQuestion( + number=1, + text="What is 2+2?", + text_confidence=0.95, + question_type=QuestionType.SHORT_ANSWER, + question_type_confidence=0.9, + ) + ], + unmapped_content=[], + warnings=[], + ) + + +# ============================================================================= +# Root and Health Endpoint Tests +# ============================================================================= + +class TestRootEndpoints: + """Tests for root and health endpoints.""" + + def test_root_endpoint(self, client): + """Test the root endpoint returns service info.""" + response = client.get("/") + + assert response.status_code == 200 + data = response.json() + assert "service" in data + assert "version" in data + assert data["status"] == "running" + + def test_health_endpoint(self, client): + """Test the health endpoint.""" + response = client.get("/health") + + assert response.status_code == 200 + data = response.json() + assert data["status"] == "healthy" + assert "service" in data + assert "version" in data + + def test_ocr_health_endpoint(self, client): + """Test the OCR-specific health endpoint.""" + with patch("app.api.routes.get_ocr_engine") as mock_engine: + mock_instance = MagicMock() + mock_instance.health_check.return_value = { + "initialized": True, + "status": "healthy", + "message": "OCR engine is operational", + } + mock_engine.return_value = mock_instance + + response = client.get("/ocr/health") + + assert response.status_code == 200 + data = response.json() + assert data["initialized"] is True + + +# ============================================================================= +# OCR Extract Endpoint Tests +# ============================================================================= + +class TestExtractEndpoint: + """Tests for the OCR extract endpoints.""" + + @patch("app.api.routes.get_ocr_engine") + def test_extract_single_image(self, mock_get_engine, client, sample_image_bytes, mock_ocr_result): + """Test extracting text from a single image.""" + # Setup mock + mock_engine = MagicMock() + mock_engine.process_image_async = AsyncMock(return_value=mock_ocr_result) + mock_get_engine.return_value = mock_engine + + # Make request + files = {"images": ("test.png", sample_image_bytes, "image/png")} + response = client.post("/ocr/v1/extract", files=[("images", ("test.png", sample_image_bytes, "image/png"))]) + + assert response.status_code in [200, 207] + data = response.json() + assert "status" in data + assert "requestId" in data + + @patch("app.api.routes.get_ocr_engine") + def test_extract_jpeg_image(self, mock_get_engine, client, sample_jpeg_bytes, mock_ocr_result): + """Test extracting text from a JPEG image.""" + mock_engine = MagicMock() + mock_engine.process_image_async = AsyncMock(return_value=mock_ocr_result) + mock_get_engine.return_value = mock_engine + + response = client.post( + "/ocr/v1/extract", + files=[("images", ("test.jpg", sample_jpeg_bytes, "image/jpeg"))] + ) + + assert response.status_code in [200, 207] + + def test_extract_no_files(self, client): + """Test that extraction fails without files.""" + response = client.post("/ocr/v1/extract", files=[]) + + # Should fail with 422 (validation error) or 400 + assert response.status_code in [400, 422] + + def test_extract_invalid_file_type(self, client): + """Test that extraction rejects invalid file types.""" + invalid_content = b"This is not an image" + + response = client.post( + "/ocr/v1/extract", + files=[("images", ("test.txt", invalid_content, "text/plain"))] + ) + + assert response.status_code == 400 + data = response.json() + assert "Invalid file type" in data.get("detail", str(data)) + + +class TestSimpleExtractEndpoint: + """Tests for the simplified single-image extract endpoint.""" + + @patch("app.api.routes.get_ocr_engine") + def test_simple_extract(self, mock_get_engine, client, sample_image_bytes, mock_ocr_result): + """Test simple extraction with a single image.""" + mock_engine = MagicMock() + mock_engine.process_image_async = AsyncMock(return_value=mock_ocr_result) + mock_get_engine.return_value = mock_engine + + response = client.post( + "/ocr/v1/extract/simple", + files={"image": ("test.png", sample_image_bytes, "image/png")} + ) + + assert response.status_code in [200, 207] + data = response.json() + assert "status" in data + + def test_simple_extract_pdf_rejected(self, client): + """Test that PDF files are rejected in simple mode.""" + pdf_content = b"%PDF-1.4 fake pdf content" + + response = client.post( + "/ocr/v1/extract/simple", + files={"image": ("test.pdf", pdf_content, "application/pdf")} + ) + + assert response.status_code == 400 + + +# ============================================================================= +# Supported Languages Endpoint Tests +# ============================================================================= + +class TestSupportedLanguagesEndpoint: + """Tests for the supported languages endpoint.""" + + def test_get_supported_languages(self, client): + """Test retrieving supported OCR languages.""" + response = client.get("/ocr/v1/supported-languages") + + assert response.status_code == 200 + data = response.json() + assert "languages" in data + assert "default" in data + assert isinstance(data["languages"], list) + assert len(data["languages"]) > 0 + + # Check that Portuguese is included + pt_lang = next((l for l in data["languages"] if l["code"] == "pt"), None) + assert pt_lang is not None + assert pt_lang["primary"] is True + + +# ============================================================================= +# Status Endpoint Tests +# ============================================================================= + +class TestStatusEndpoint: + """Tests for the request status endpoint.""" + + def test_get_status_not_implemented(self, client): + """Test that status endpoint returns 501 (not implemented).""" + response = client.get("/ocr/v1/status/req-test-123") + + assert response.status_code == 501 + data = response.json() + assert "requestId" in data + assert data["status"] == "unknown" + + +# ============================================================================= +# File Validation Tests +# ============================================================================= + +class TestFileValidation: + """Tests for file validation logic.""" + + def test_valid_extensions(self, client, sample_image_bytes): + """Test that valid extensions are accepted.""" + valid_extensions = [ + ("test.jpg", "image/jpeg"), + ("test.jpeg", "image/jpeg"), + ("test.png", "image/png"), + ("test.webp", "image/webp"), + ("test.bmp", "image/bmp"), + ] + + with patch("app.api.routes.get_ocr_engine") as mock_get_engine: + mock_engine = MagicMock() + mock_engine.process_image_async = AsyncMock(return_value=MagicMock( + status="success", + to_dict=lambda: {"status": "success", "requestId": "test"} + )) + mock_get_engine.return_value = mock_engine + + for filename, content_type in valid_extensions: + response = client.post( + "/ocr/v1/extract", + files=[("images", (filename, sample_image_bytes, content_type))] + ) + # Should not return 400 for invalid file type + assert response.status_code != 400 or "Invalid file type" not in response.text + + def test_invalid_extensions(self, client): + """Test that invalid extensions are rejected.""" + invalid_files = [ + ("test.txt", b"text content", "text/plain"), + ("test.doc", b"doc content", "application/msword"), + ("test.exe", b"exe content", "application/octet-stream"), + ("test.html", b"", "text/html"), + ] + + for filename, content, content_type in invalid_files: + response = client.post( + "/ocr/v1/extract", + files=[("images", (filename, content, content_type))] + ) + assert response.status_code == 400 + + +# ============================================================================= +# Error Handling Tests +# ============================================================================= + +class TestErrorHandling: + """Tests for error handling in API routes.""" + + @patch("app.api.routes.get_ocr_engine") + def test_ocr_processing_error(self, mock_get_engine, client, sample_image_bytes): + """Test handling of OCR processing errors.""" + mock_engine = MagicMock() + mock_engine.process_image_async = AsyncMock(side_effect=Exception("OCR failed")) + mock_get_engine.return_value = mock_engine + + response = client.post( + "/ocr/v1/extract", + files=[("images", ("test.png", sample_image_bytes, "image/png"))] + ) + + assert response.status_code == 500 + + @patch("app.api.routes.get_ocr_engine") + def test_invalid_image_content(self, mock_get_engine, client): + """Test handling of invalid image content.""" + mock_engine = MagicMock() + mock_engine.process_image_async = AsyncMock( + side_effect=ValueError("Failed to decode image") + ) + mock_get_engine.return_value = mock_engine + + # Send garbage data with valid extension + response = client.post( + "/ocr/v1/extract", + files=[("images", ("test.png", b"not an image", "image/png"))] + ) + + # Should return error status + assert response.status_code in [400, 500] + + +# ============================================================================= +# Response Format Tests +# ============================================================================= + +class TestResponseFormat: + """Tests for response format correctness.""" + + @patch("app.api.routes.get_ocr_engine") + def test_success_response_format(self, mock_get_engine, client, sample_image_bytes, mock_ocr_result): + """Test that success response has correct format.""" + mock_engine = MagicMock() + mock_engine.process_image_async = AsyncMock(return_value=mock_ocr_result) + mock_get_engine.return_value = mock_engine + + response = client.post( + "/ocr/v1/extract", + files=[("images", ("test.png", sample_image_bytes, "image/png"))] + ) + + assert response.status_code in [200, 207] + data = response.json() + + # Check required fields + assert "status" in data + assert "requestId" in data + assert "processingTimeMs" in data + assert "overallConfidence" in data + assert "document" in data + assert "metadata" in data + assert "questions" in data + assert "warnings" in data + + @patch("app.api.routes.get_ocr_engine") + def test_document_info_format(self, mock_get_engine, client, sample_image_bytes, mock_ocr_result): + """Test that document info has correct format.""" + mock_engine = MagicMock() + mock_engine.process_image_async = AsyncMock(return_value=mock_ocr_result) + mock_get_engine.return_value = mock_engine + + response = client.post( + "/ocr/v1/extract", + files=[("images", ("test.png", sample_image_bytes, "image/png"))] + ) + + data = response.json() + document = data.get("document", {}) + + assert "pageCount" in document + assert "mainLanguage" in document + assert "hasTables" in document + + +if __name__ == "__main__": + pytest.main([__file__, "-v"])