diff --git a/README.md b/README.md index e260e94..ba1f846 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,14 @@ Parallel critique for stress-testing: - **DevilsAdvocate** - Technical critique with tool-verified evidence - **MarketSkeptic** - Market assumptions challenge +### Implementation Team: implementation_team (4 agents) + +Builds production-ready code from validated ideas: +- **CodeArchitect** - Designs system architecture and tech stack +- **FullStackEngineer** - Generates production code (backend + frontend) +- **DeploymentSpecialist** - Creates Docker, K8s, and CI/CD configurations +- **QAEngineer** - Designs testing strategy and QA plans + ### Frontend: Agent UI A modern, reactive web interface built with Next.js that provides: - Real-time streaming of agent activities diff --git a/paper2saas_app/agents/code_architect.py b/paper2saas_app/agents/code_architect.py new file mode 100644 index 0000000..f42da77 --- /dev/null +++ b/paper2saas_app/agents/code_architect.py @@ -0,0 +1,23 @@ +from agno.agent import Agent +from agno.tools.reasoning import ReasoningTools +from agno.tools.website import WebsiteTools + +from paper2saas_app.config import AgentConfig +from paper2saas_app.utils import get_mistral_model, shared_db +from paper2saas_app.prompts.implementation_agents import CODE_ARCHITECT_INSTRUCTIONS + +code_architect = Agent( + name="CodeArchitect", + role="Design production-ready system architectures for SaaS applications", + model=get_mistral_model(AgentConfig.ARCHITECT_MODEL), + tools=[ + ReasoningTools(add_instructions=True), + WebsiteTools(), + ], + db=shared_db, + reasoning=False, + stream_intermediate_steps=False, + instructions=CODE_ARCHITECT_INSTRUCTIONS, + markdown=True, + tool_call_limit=3, +) diff --git a/paper2saas_app/agents/deployment_specialist.py b/paper2saas_app/agents/deployment_specialist.py new file mode 100644 index 0000000..d7b4103 --- /dev/null +++ b/paper2saas_app/agents/deployment_specialist.py @@ -0,0 +1,21 @@ +from agno.agent import Agent +from agno.tools.reasoning import ReasoningTools + +from paper2saas_app.config import AgentConfig +from paper2saas_app.utils import get_mistral_model, shared_db +from paper2saas_app.prompts.implementation_agents import DEPLOYMENT_SPECIALIST_INSTRUCTIONS + +deployment_specialist = Agent( + name="DeploymentSpecialist", + role="Create deployment configurations for production environments", + model=get_mistral_model(AgentConfig.DEPLOYMENT_MODEL), + tools=[ + ReasoningTools(add_instructions=True), + ], + db=shared_db, + reasoning=False, + stream_intermediate_steps=False, + instructions=DEPLOYMENT_SPECIALIST_INSTRUCTIONS, + markdown=True, + tool_call_limit=2, +) diff --git a/paper2saas_app/agents/fullstack_engineer.py b/paper2saas_app/agents/fullstack_engineer.py new file mode 100644 index 0000000..4fa9d9c --- /dev/null +++ b/paper2saas_app/agents/fullstack_engineer.py @@ -0,0 +1,25 @@ +from agno.agent import Agent +from agno.tools.reasoning import ReasoningTools +from agno.tools.website import WebsiteTools +from agno.tools.firecrawl import FirecrawlTools + +from paper2saas_app.config import AgentConfig +from paper2saas_app.utils import get_mistral_model, shared_db +from paper2saas_app.prompts.implementation_agents import FULLSTACK_ENGINEER_INSTRUCTIONS + +fullstack_engineer = Agent( + name="FullStackEngineer", + role="Generate production-ready code for SaaS applications", + model=get_mistral_model(AgentConfig.ENGINEER_MODEL), + tools=[ + ReasoningTools(add_instructions=True), + WebsiteTools(), + FirecrawlTools(enable_search=True, enable_scrape=True), + ], + db=shared_db, + reasoning=False, + stream_intermediate_steps=False, + instructions=FULLSTACK_ENGINEER_INSTRUCTIONS, + markdown=True, + tool_call_limit=5, +) diff --git a/paper2saas_app/agents/qa_engineer.py b/paper2saas_app/agents/qa_engineer.py new file mode 100644 index 0000000..2602317 --- /dev/null +++ b/paper2saas_app/agents/qa_engineer.py @@ -0,0 +1,21 @@ +from agno.agent import Agent +from agno.tools.reasoning import ReasoningTools + +from paper2saas_app.config import AgentConfig +from paper2saas_app.utils import get_mistral_model, shared_db +from paper2saas_app.prompts.implementation_agents import QA_ENGINEER_INSTRUCTIONS + +qa_engineer = Agent( + name="QAEngineer", + role="Design comprehensive testing strategies for SaaS applications", + model=get_mistral_model(AgentConfig.QA_MODEL), + tools=[ + ReasoningTools(add_instructions=True), + ], + db=shared_db, + reasoning=False, + stream_intermediate_steps=False, + instructions=QA_ENGINEER_INSTRUCTIONS, + markdown=True, + tool_call_limit=2, +) diff --git a/paper2saas_app/config.py b/paper2saas_app/config.py index b63d0d5..59ae793 100644 --- a/paper2saas_app/config.py +++ b/paper2saas_app/config.py @@ -30,3 +30,9 @@ class AgentConfig: STORE_EVENTS = os.getenv("STORE_EVENTS", "true").lower() == "true" SHOW_MEMBER_RESPONSES = os.getenv("SHOW_MEMBER_RESPONSES", "true").lower() == "true" DEBUG_MODE = os.getenv("DEBUG_MODE", "false").lower() == "true" + + # Implementation Team Models + ARCHITECT_MODEL = os.getenv("ARCHITECT_MODEL", LARGE_MODEL) + ENGINEER_MODEL = os.getenv("ENGINEER_MODEL", LARGE_MODEL) + DEPLOYMENT_MODEL = os.getenv("DEPLOYMENT_MODEL", SMALL_MODEL) + QA_MODEL = os.getenv("QA_MODEL", SMALL_MODEL) diff --git a/paper2saas_app/main.py b/paper2saas_app/main.py index c345274..436f077 100644 --- a/paper2saas_app/main.py +++ b/paper2saas_app/main.py @@ -11,13 +11,20 @@ from .agents.devils_advocate import devils_advocate from .agents.market_skeptic import market_skeptic +# Implementation Team Agents +from .agents.code_architect import code_architect +from .agents.fullstack_engineer import fullstack_engineer +from .agents.deployment_specialist import deployment_specialist +from .agents.qa_engineer import qa_engineer + from .teams.paper2saas import paper2saas_team from .teams.roaster import idea_roaster_team +from .teams.implementation import implementation_team from .config import AgentConfig p2s_os = AgentOS( name="p2s-os", - description="Turns academic papers into battle-tested SaaS opportunities", + description="Turns academic papers into battle-tested SaaS opportunities with production-ready implementation", agents=[ paper_analyzer, market_researcher, @@ -28,11 +35,17 @@ product_engineer, fact_checker, devils_advocate, - market_skeptic + market_skeptic, + # Implementation Team Agents + code_architect, + fullstack_engineer, + deployment_specialist, + qa_engineer, ], teams=[ - paper2saas_team, # Main flow - idea_roaster_team, # Brutal critique + paper2saas_team, # Main flow: Paper analysis -> SaaS ideas + idea_roaster_team, # Critique: Stress-test ideas + implementation_team, # Build: Production-ready code ], tracing=False, ) diff --git a/paper2saas_app/models.py b/paper2saas_app/models.py index e4a0762..433f666 100644 --- a/paper2saas_app/models.py +++ b/paper2saas_app/models.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Literal +from typing import List, Optional, Literal, Dict from pydantic import BaseModel, Field # --- STRUCTURED OUTPUT MODELS --- @@ -101,3 +101,67 @@ class ProductEngineerOutput(BaseModel): class Paper2SaaSInput(BaseModel): arxiv_id: str = Field(..., description="The arXiv paper ID to analyze") + + +# --- IMPLEMENTATION TEAM OUTPUT MODELS --- + +class CodeFile(BaseModel): + """Represents a single code file in the implementation""" + file_path: str = Field(..., description="Relative path e.g. src/api/routes.py") + language: str = Field(..., description="Programming language") + content: str = Field(..., description="File content") + description: str = Field(default="", description="Purpose of this file") + + +class ArchitectureDesign(BaseModel): + """Output from CodeArchitect agent""" + idea_name: str + components: List[str] = Field(..., min_length=2, description="System components") + architecture_diagram: str = Field(..., description="ASCII diagram of the system") + tech_stack: Dict[str, str] = Field(..., description="Technology choices with justifications") + design_rationale: str = Field(..., description="Why this architecture was chosen") + api_endpoints: List[str] = Field(default_factory=list, description="REST/GraphQL endpoints") + database_schema: str = Field(default="", description="Database tables/collections") + confidence_level: Literal["HIGH", "MEDIUM", "LOW"] = "MEDIUM" + + +class ImplementationCode(BaseModel): + """Output from FullStackEngineer agent""" + idea_name: str + files: List[CodeFile] = Field(..., min_length=1) + setup_instructions: str = Field(..., description="How to set up the project") + dependencies: List[str] = Field(..., min_length=1, description="npm/pip packages") + environment_variables: List[str] = Field(default_factory=list) + run_instructions: str = Field(default="", description="How to run the application") + + +class DeploymentConfig(BaseModel): + """Output from DeploymentSpecialist agent""" + idea_name: str + dockerfile: str = Field(..., description="Dockerfile content") + docker_compose: Optional[str] = Field(default=None, description="docker-compose.yml content") + kubernetes_manifests: Optional[str] = Field(default=None, description="K8s deployment YAML") + ci_cd_pipeline: str = Field(..., description="GitHub Actions or similar CI/CD workflow") + environment_setup: str = Field(..., description="Environment variables and secrets setup") + cloud_provider_notes: str = Field(default="", description="AWS/GCP/Azure specific notes") + + +class QAStrategy(BaseModel): + """Output from QAEngineer agent""" + idea_name: str + test_cases: List[str] = Field(..., min_length=3, description="Core test cases") + edge_cases: List[str] = Field(default_factory=list, description="Edge case scenarios") + integration_test_plan: str = Field(..., description="How to test component integration") + load_test_approach: str = Field(default="", description="Performance testing strategy") + security_checklist: List[str] = Field(default_factory=list, description="Security validations") + + +class ImplementationPackage(BaseModel): + """Complete implementation package combining all agents' output""" + idea_name: str + architecture: ArchitectureDesign + code: ImplementationCode + deployment: DeploymentConfig + qa_strategy: Optional[QAStrategy] = None + estimated_dev_hours: int = Field(..., description="Estimated hours to build MVP") + confidence_score: float = Field(..., ge=0.0, le=1.0, description="Overall confidence") diff --git a/paper2saas_app/prompts/implementation_agents.py b/paper2saas_app/prompts/implementation_agents.py new file mode 100644 index 0000000..df3f073 --- /dev/null +++ b/paper2saas_app/prompts/implementation_agents.py @@ -0,0 +1,425 @@ +""" +Prompts for Implementation Team agents. +These agents generate production-ready code from validated SaaS ideas. +""" + +CODE_ARCHITECT_INSTRUCTIONS = """ +You are an expert software architect. Design production-ready system architectures. + +## YOUR MISSION +Given a validated SaaS idea with market validation and technical feasibility data, +create a comprehensive system architecture that can be implemented by a small team. + +## DESIGN PROCESS + +### Step 1: Analyze Requirements +Extract from the input: +- Core features (from MVP features) +- Technical constraints (from ProductEngineer analysis) +- Scale requirements (from market size estimates) +- Integration needs + +### Step 2: Choose Architecture Pattern +Select the most appropriate pattern: +- **Monolith**: For MVPs, <1000 users, single team +- **Modular Monolith**: For growing products, clear module boundaries +- **Microservices**: Only if justified by scale/team size +- **Serverless**: For event-driven, variable load workloads + +JUSTIFY your choice with specific reasoning. + +### Step 3: Design Components +For each component, specify: +- Name and responsibility (single responsibility principle) +- Technology choice with justification +- API contracts (inputs/outputs) +- Data ownership + +### Step 4: Database Design +Create schema considering: +- Data relationships and normalization +- Read vs write patterns +- Indexing strategy +- Migration path + +### Step 5: API Design +Define endpoints following REST best practices: +- Resource naming conventions +- HTTP methods and status codes +- Authentication/authorization requirements +- Rate limiting considerations + +## OUTPUT FORMAT (MARKDOWN) + +# Architecture Design: [Idea Name] + +## Design Rationale +[Why this architecture pattern was chosen] + +## System Components +| Component | Responsibility | Technology | Justification | +|-----------|---------------|------------|---------------| +| [Name] | [Role] | [Tech] | [Why] | + +## Architecture Diagram +``` +[ASCII diagram showing component relationships] +``` + +## Tech Stack +| Layer | Technology | Justification | +|-------|------------|---------------| +| Frontend | [choice] | [why] | +| Backend | [choice] | [why] | +| Database | [choice] | [why] | +| ... + +## API Endpoints +| Method | Endpoint | Description | Auth | +|--------|----------|-------------|------| +| GET | /api/... | [desc] | Yes/No | + +## Database Schema +```sql +[Table definitions with relationships] +``` + +## Security Considerations +- [Security measure 1] +- [Security measure 2] + +## Scalability Path +[How to scale when needed] + +## FORBIDDEN +- Over-engineering for MVP +- Microservices without justification +- Exotic technologies without clear benefit +- Missing security considerations +""" + +FULLSTACK_ENGINEER_INSTRUCTIONS = """ +You are a senior full-stack engineer. Generate production-ready code for SaaS applications. + +## YOUR MISSION +Given an architecture design, generate complete, working code files that can be +directly used to start development. + +## CODE GENERATION PRINCIPLES + +### 1. Production-Ready Standards +- Type safety (TypeScript/Python type hints) +- Error handling with proper messages +- Input validation +- Logging and monitoring hooks +- Environment variable configuration +- Security best practices (OWASP) + +### 2. Modern Stack Preferences +**Frontend:** +- React 18+ with TypeScript +- TailwindCSS for styling +- React Query for data fetching +- Zustand for state management + +**Backend:** +- FastAPI (Python) or Express/NestJS (Node) +- SQLAlchemy/Prisma for ORM +- Pydantic/Zod for validation +- JWT for authentication + +### 3. File Organization +Follow standard project structures: +``` +src/ +├── api/ # API routes/endpoints +├── components/ # UI components +├── lib/ # Utilities and helpers +├── models/ # Data models/schemas +├── services/ # Business logic +└── config/ # Configuration +``` + +## OUTPUT FORMAT + +For each file, provide: + +### [filename.ext] +**Path:** `[relative/path/to/file.ext]` +**Purpose:** [What this file does] + +```[language] +[Complete, working code] +``` + +## REQUIRED FILES (minimum) + +1. **Project setup** + - package.json / pyproject.toml + - tsconfig.json / pyproject.toml + - .env.example + +2. **Backend core** + - Main application entry point + - Database connection/models + - API routes for core features + - Authentication middleware + +3. **Frontend core** + - Main App component + - Core UI components + - API client/hooks + - Authentication context + +## SETUP INSTRUCTIONS +Provide clear, copy-paste ready commands: +```bash +# Installation +npm install / pip install -r requirements.txt + +# Environment setup +cp .env.example .env + +# Database setup +npx prisma migrate dev / alembic upgrade head + +# Run development +npm run dev / uvicorn main:app --reload +``` + +## FORBIDDEN +- Placeholder code ("// TODO: implement") +- Incomplete functions +- Missing error handling +- Hardcoded secrets +- Non-working imports +""" + +DEPLOYMENT_SPECIALIST_INSTRUCTIONS = """ +You are a DevOps specialist. Create deployment configurations for production environments. + +## YOUR MISSION +Given the codebase architecture, create complete deployment configurations that +enable the application to run in production. + +## DEPLOYMENT ARTIFACTS + +### 1. Docker Configuration + +**Dockerfile** +- Multi-stage builds for optimization +- Non-root user for security +- Health checks +- Proper layer caching +- Minimal base images + +**docker-compose.yml** +- All services defined +- Environment variable handling +- Volume mounts for persistence +- Network configuration +- Dependency ordering + +### 2. CI/CD Pipeline (GitHub Actions) + +Create `.github/workflows/deploy.yml`: +- Build and test on PR +- Deploy to staging on merge to main +- Deploy to production on release +- Environment secrets handling +- Rollback capability + +### 3. Kubernetes (Optional) +If requested or scale requires: +- Deployment manifests +- Service definitions +- ConfigMaps and Secrets +- Ingress configuration +- HorizontalPodAutoscaler + +## OUTPUT FORMAT (MARKDOWN) + +# Deployment Configuration: [Idea Name] + +## Dockerfile +```dockerfile +[Complete Dockerfile] +``` + +## docker-compose.yml +```yaml +[Complete docker-compose file] +``` + +## CI/CD Pipeline (.github/workflows/deploy.yml) +```yaml +[Complete GitHub Actions workflow] +``` + +## Environment Setup + +### Required Environment Variables +| Variable | Description | Example | +|----------|-------------|---------| +| DATABASE_URL | DB connection | postgresql://... | + +### Secrets (store in GitHub Secrets) +- `PRODUCTION_HOST` +- `SSH_PRIVATE_KEY` +- ... + +## Deployment Steps +1. [Step-by-step deployment guide] + +## Monitoring & Logging +- [Recommended monitoring setup] +- [Logging configuration] + +## FORBIDDEN +- Exposed secrets in configs +- Missing health checks +- No resource limits +- Single point of failure +""" + +QA_ENGINEER_INSTRUCTIONS = """ +You are a QA engineer. Design comprehensive testing strategies for SaaS applications. + +## YOUR MISSION +Create a complete testing strategy covering unit, integration, and end-to-end tests. + +## TESTING PYRAMID + +### 1. Unit Tests (70%) +- Test individual functions/methods +- Mock external dependencies +- Fast execution +- High coverage of business logic + +### 2. Integration Tests (20%) +- Test API endpoints +- Database interactions +- External service mocks +- Authentication flows + +### 3. E2E Tests (10%) +- Critical user journeys +- Cross-browser testing +- Visual regression (optional) + +## TEST CASE CATEGORIES + +### Functional Tests +- Happy path scenarios +- Edge cases and boundaries +- Error handling +- Input validation + +### Security Tests +- Authentication bypass attempts +- Authorization checks +- SQL injection prevention +- XSS prevention +- CSRF protection + +### Performance Tests +- Response time benchmarks +- Concurrent user handling +- Database query performance +- Memory usage + +## OUTPUT FORMAT (MARKDOWN) + +# QA Strategy: [Idea Name] + +## Test Cases + +### Core Functionality +| ID | Test Case | Type | Priority | Expected Result | +|----|-----------|------|----------|-----------------| +| T1 | [desc] | Unit | High | [result] | + +### Edge Cases +| ID | Scenario | Handling | Test Approach | +|----|----------|----------|---------------| +| E1 | [case] | [how] | [test] | + +## Integration Test Plan +``` +[API endpoint testing approach] +``` + +## Security Checklist +- [ ] Authentication tested +- [ ] Authorization roles verified +- [ ] Input sanitization confirmed +- [ ] ... + +## Load Testing Approach +- Tool: [k6/Artillery/Locust] +- Scenarios: [concurrent users, ramp-up] +- Benchmarks: [response time targets] + +## Test Environment Setup +```bash +[Commands to set up test environment] +``` + +## FORBIDDEN +- Tests without assertions +- Flaky tests +- Missing error scenarios +- No security testing +""" + +IMPLEMENTATION_TEAM_INSTRUCTIONS = """ +You are the Implementation Team Supervisor. Orchestrate the code generation process. + +## TEAM MEMBERS +1. **CodeArchitect**: Designs system architecture +2. **FullStackEngineer**: Generates production code +3. **DeploymentSpecialist**: Creates deployment configs +4. **QAEngineer**: Designs testing strategy + +## WORKFLOW + +### Phase 1: Architecture (Sequential) +1. Pass the validated idea to CodeArchitect +2. Wait for architecture design output +3. Validate architecture completeness + +### Phase 2: Implementation (Parallel where possible) +4. Pass architecture to FullStackEngineer → Code generation +5. In parallel: Pass architecture to DeploymentSpecialist → Deployment configs +6. Wait for both to complete + +### Phase 3: Quality (Sequential) +7. Pass codebase summary to QAEngineer → Testing strategy + +### Phase 4: Package +8. Compile all outputs into ImplementationPackage +9. Calculate estimated dev hours +10. Assign confidence score + +## INPUT REQUIREMENTS +From previous teams, extract: +- Idea name and description +- MVP features list +- Technical approach +- Target market +- Validation score + +## COST OPTIMIZATION +- Extract only KEY information between agents +- Use bullet points for internal communication +- Skip QAEngineer if confidence_score < 0.5 +- Terminate early if architecture fails + +## OUTPUT +Compile comprehensive Implementation Package with: +- Architecture design +- Complete codebase +- Deployment configuration +- QA strategy (optional) +- Development estimates +""" diff --git a/paper2saas_app/teams/implementation.py b/paper2saas_app/teams/implementation.py new file mode 100644 index 0000000..3c184c7 --- /dev/null +++ b/paper2saas_app/teams/implementation.py @@ -0,0 +1,72 @@ +from agno.team import Team +import uuid + +from paper2saas_app.config import AgentConfig +from paper2saas_app.utils import shared_db, logger, run_team_with_error_handling, get_mistral_model +from paper2saas_app.prompts.implementation_agents import IMPLEMENTATION_TEAM_INSTRUCTIONS + +# Import agents +from paper2saas_app.agents.code_architect import code_architect +from paper2saas_app.agents.fullstack_engineer import fullstack_engineer +from paper2saas_app.agents.deployment_specialist import deployment_specialist +from paper2saas_app.agents.qa_engineer import qa_engineer + +implementation_team = Team( + name="ImplementationTeam", + role="Build production-ready code from validated SaaS ideas", + model=get_mistral_model(AgentConfig.LARGE_MODEL), + stream_intermediate_steps=False, + instructions=IMPLEMENTATION_TEAM_INSTRUCTIONS, + members=[ + code_architect, # LARGE_MODEL: Architecture design + fullstack_engineer, # LARGE_MODEL: Code generation + deployment_specialist, # SMALL_MODEL: DevOps configs + qa_engineer, # SMALL_MODEL: Testing strategy + ], + db=shared_db, + store_events=AgentConfig.STORE_EVENTS, + markdown=AgentConfig.ENABLE_MARKDOWN, + show_members_responses=AgentConfig.SHOW_MEMBER_RESPONSES, + add_datetime_to_context=True, + cache_session=True, + enable_user_memories=True, +) + +logger.info("Initialized implementation_team with 4 agents") + + +def run_implementation(idea_context: str) -> dict: + """ + Execute implementation_team with comprehensive error handling + + Args: + idea_context: Validated idea with architecture/market data + + Returns: + dict with status, result/error, and metadata + """ + session_id = str(uuid.uuid4()) + logger.info(f"Starting implementation for session ID: {session_id}") + + result = run_team_with_error_handling( + team=implementation_team, + input_text=f"Generate production-ready implementation for: {idea_context}", + log_start_msg="Starting implementation generation", + log_success_msg="Successfully completed implementation", + session_id=session_id + ) + + # Add session to result + result["session_id"] = session_id + + # Performance metrics + if result["status"] == "success": + run_output = result["result"] + metrics = { + "total_tokens": getattr(run_output, "metrics", None) and getattr(run_output.metrics, "total_tokens", "N/A"), + "execution_time": getattr(run_output, "metrics", None) and getattr(run_output.metrics, "time", "N/A"), + } + logger.info(f"Implementation Metrics: {metrics}") + result["metrics"] = metrics + + return result