Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
.git
.gitignore
.venv
__pycache__
*.pyc
*.pyo
.mypy_cache
.ruff_cache
.pytest_cache
*.egg-info
dist
build
.env
.env.*
operator/
docs/
setup/
test_agent.py
test_agent_problem_sets.json
*.md
hack/
.github/
.idea/
Makefile
deploy/
evaluator/datasets/swebench_verified/repos/
67 changes: 67 additions & 0 deletions .github/workflows/build-images.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: Build Images

on:
push:
branches:
- main
tags:
- "v*"
pull_request:
branches:
- main

permissions:
contents: read
packages: write

jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- image: screener
dockerfile: validator/Dockerfile
context: .
- image: operator
dockerfile: operator/Dockerfile
context: operator

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Log in to GHCR
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository }}/${{ matrix.image }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha

- name: Build and push
uses: docker/build-push-action@v6
with:
context: ${{ matrix.context }}
file: ${{ matrix.dockerfile }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
23 changes: 23 additions & 0 deletions api/endpoints/screener.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from fastapi import APIRouter, Query, HTTPException
from pydantic import BaseModel

from models.evaluation_set import EvaluationSetGroup
from queries.agent import get_queue_depth, get_active_evaluation_count

router = APIRouter()


class QueueDepthResponse(BaseModel):
depth: int
stage: str
active: int


@router.get("/queue-depth", response_model=QueueDepthResponse)
async def queue_depth(
stage: str = Query(..., pattern="^(screener_1|screener_2|validator)$"),
):
group = EvaluationSetGroup(stage)
depth = await get_queue_depth(group)
active = await get_active_evaluation_count(group)
return QueueDepthResponse(depth=depth, stage=stage, active=active)
2 changes: 2 additions & 0 deletions api/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from api.endpoints.scoring import router as scoring_router
from api.endpoints.statistics import router as statistics_router
from api.endpoints.retrieval import router as retrieval_router
from api.endpoints.screener import router as screener_router



Expand Down Expand Up @@ -114,6 +115,7 @@ async def lifespan(app: FastAPI):
app.include_router(evaluation_run_router, prefix="/evaluation-run")
app.include_router(evaluations_router, prefix="/evaluation")
app.include_router(statistics_router, prefix="/statistics")
app.include_router(screener_router, prefix="/screener")


if __name__ == "__main__":
Expand Down
4 changes: 3 additions & 1 deletion evaluator/sandbox/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | b
ln -sf "$NVM_DIR/versions/node/$NODE_VERSION/bin/npm" /usr/local/bin/npm && \
ln -sf "$NVM_DIR/versions/node/$NODE_VERSION/bin/npx" /usr/local/bin/npx && \
ln -sf "$NVM_DIR/versions/node/$NODE_VERSION/bin/node" /usr/local/bin/node && \
ln -sf "$NVM_DIR/versions/node/$NODE_VERSION/lib/node_modules" /usr/local/lib/node_modules
ln -sf "$NVM_DIR/versions/node/$NODE_VERSION/lib/node_modules" /usr/local/lib/node_modules && \
chmod -R a+rX /root/.nvm && \
chmod a+rx /root

ENV NODE_PATH=/usr/local/lib/node_modules

Expand Down
7 changes: 6 additions & 1 deletion evaluator/sandbox/proxy/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
FROM nginx:alpine

USER root

RUN apk add --no-cache gettext
RUN mkdir -p /sandbox-proxy && chmod 0777 /sandbox-proxy

COPY nginx.conf.template /tmp/nginx.conf.template

CMD ["/bin/sh", "-c", "envsubst '${GATEWAY_URL} ${GATEWAY_HOST}' < /tmp/nginx.conf.template > /etc/nginx/nginx.conf && exec nginx -g 'daemon off;'"]
USER nginx

CMD ["/bin/sh", "-c", "envsubst '${GATEWAY_URL} ${GATEWAY_HOST}' < /tmp/nginx.conf.template > /sandbox-proxy/nginx.conf && exec nginx -c /sandbox-proxy/nginx.conf -g 'daemon off;'"]
56 changes: 41 additions & 15 deletions evaluator/sandbox/proxy/nginx.conf.template
Original file line number Diff line number Diff line change
@@ -1,26 +1,52 @@
pid /sandbox-proxy/nginx.pid;

events {}

http {
# Use a public DNS resolver for upstream name resolution
resolver 1.1.1.1 1.0.0.1 ipv6=off;

client_body_temp_path /sandbox-proxy/client_temp;
proxy_temp_path /sandbox-proxy/proxy_temp;
fastcgi_temp_path /sandbox-proxy/fastcgi_temp;
uwsgi_temp_path /sandbox-proxy/uwsgi_temp;
scgi_temp_path /sandbox-proxy/scgi_temp;

limit_req_zone $binary_remote_addr zone=agent:10m rate=30r/s;

server {
listen 80;
listen 8080;

proxy_set_header Host $GATEWAY_HOST;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;

proxy_connect_timeout 300s;
proxy_send_timeout 300s;
proxy_read_timeout 300s;

# Ensure TLS SNI to Cloudflare/origin matches the hostname
#proxy_ssl_server_name on;
#proxy_ssl_name $GATEWAY_HOST;

location /api/inference {
limit_req zone=agent burst=10 nodelay;
proxy_pass $GATEWAY_URL/api/inference;
}

location /api/embedding {
limit_req zone=agent burst=10 nodelay;
proxy_pass $GATEWAY_URL/api/embedding;
}

location /api/usage {
limit_req zone=agent burst=10 nodelay;
proxy_pass $GATEWAY_URL/api/usage;
}

location / {
proxy_pass $GATEWAY_URL;
proxy_set_header Host $GATEWAY_HOST;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;

proxy_connect_timeout 300s;
proxy_send_timeout 300s;
proxy_read_timeout 300s;

# Ensure TLS SNI to Cloudflare/origin matches the hostname
proxy_ssl_server_name on;
proxy_ssl_name $GATEWAY_HOST;
return 403 "Forbidden";
}
}
}
}
22 changes: 18 additions & 4 deletions evaluator/sandbox/sandbox_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
SANDBOX_NETWORK_NAME = f"{DOCKER_PREFIX}-sandbox-network"

SANDBOX_PROXY_HOST = f"{DOCKER_PREFIX}-sandbox-proxy"
SANDBOX_PROXY_PORT = 80
SANDBOX_PROXY_PORT = 8080



Expand Down Expand Up @@ -135,20 +135,34 @@ def initialize_sandbox(
elif script_extension == ".js":
command = f"node /sandbox/{script_name} 2>&1"

# Create Docker container
for root, dirs, files in os.walk(temp_dir):
os.chmod(root, 0o777)
for fname in files:
os.chmod(os.path.join(root, fname), 0o666)

container = get_docker_client().containers.run(
name=name,
image=f"{DOCKER_PREFIX}-sandbox-image",
volumes={temp_dir: {"bind": "/sandbox", "mode": "rw"}},
network=SANDBOX_NETWORK_NAME,
environment={
"PYTHONUNBUFFERED": "1",
"PYTHONDONTWRITEBYTECODE": "1", # No __pycache__
"PYTHONDONTWRITEBYTECODE": "1",
"HOME": "/tmp",
"SANDBOX_PROXY_URL": f"http://{SANDBOX_PROXY_HOST}:{SANDBOX_PROXY_PORT}",
"GIT_CONFIG_COUNT": "1",
"GIT_CONFIG_KEY_0": "safe.directory",
"GIT_CONFIG_VALUE_0": "/sandbox/repo",
**env_vars
},
command=command,
detach=True
detach=True,
user="65534",
read_only=True,
tmpfs={"/tmp": "size=64m,mode=1777"},
pids_limit=256,
security_opt=["no-new-privileges"],
cap_drop=["ALL"],
)

return Sandbox(
Expand Down
2 changes: 2 additions & 0 deletions operator/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.git
*.md
12 changes: 12 additions & 0 deletions operator/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM golang:1.25 AS modules
WORKDIR /app
COPY go.mod go.sum* ./
RUN go mod download

FROM modules AS builder
COPY . .
RUN CGO_ENABLED=0 go build -o manager ./cmd/main.go

FROM gcr.io/distroless/static:nonroot
COPY --from=builder /app/manager /manager
ENTRYPOINT ["/manager"]
94 changes: 94 additions & 0 deletions operator/api/v1alpha1/evaluatorpool_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package v1alpha1

import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
ConditionReady = "Ready"
ConditionAPIReachable = "APIReachable"
ConditionSecretReady = "SecretReady"
ConditionNodesAvailable = "NodesAvailable"
ConditionScalingActive = "ScalingActive"
ConditionDegraded = "Degraded"
)

// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:subresource:scale:specpath=.spec.scaling.minReplicas,statuspath=.status.currentReplicas
// +kubebuilder:resource:shortName=ep,categories={ridges}
// +kubebuilder:printcolumn:name="Stage",type=string,JSONPath=`.spec.stage`
// +kubebuilder:printcolumn:name="Desired",type=integer,JSONPath=`.status.desiredReplicas`
// +kubebuilder:printcolumn:name="Current",type=integer,JSONPath=`.status.currentReplicas`
// +kubebuilder:printcolumn:name="Queue",type=integer,JSONPath=`.status.lastQueueDepth`
// +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].status`
// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`

type EvaluatorPool struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec EvaluatorPoolSpec `json:"spec,omitempty"`
Status EvaluatorPoolStatus `json:"status,omitempty"`
}

type EvaluatorPoolSpec struct {
// +kubebuilder:validation:Enum=screener_1;screener_2
Stage string `json:"stage"`

Scaling EvaluatorPoolScaling `json:"scaling"`

// +kubebuilder:validation:Minimum=1
// +kubebuilder:default=30
PollingIntervalSeconds int32 `json:"pollingIntervalSeconds,omitempty"`

// +optional
Resources *EvaluatorPoolResources `json:"resources,omitempty"`
}

type EvaluatorPoolResources struct {
// +optional
Screener *corev1.ResourceRequirements `json:"screener,omitempty"`
// +optional
Dind *corev1.ResourceRequirements `json:"dind,omitempty"`
}

type EvaluatorPoolScaling struct {
// +kubebuilder:validation:Minimum=0
// +kubebuilder:default=0
MinReplicas int32 `json:"minReplicas,omitempty"`

// +kubebuilder:validation:Minimum=1
MaxReplicas int32 `json:"maxReplicas"`

// +kubebuilder:validation:Minimum=0
// +kubebuilder:default=600
ScaleDownStabilizationSeconds int32 `json:"scaleDownStabilizationSeconds,omitempty"`
}

type EvaluatorPoolStatus struct {
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
CurrentReplicas int32 `json:"currentReplicas"`
DesiredReplicas int32 `json:"desiredReplicas"`
LastQueueDepth int32 `json:"lastQueueDepth"`
LastScaleUpTime *metav1.Time `json:"lastScaleUpTime,omitempty"`
LastScaleDownTime *metav1.Time `json:"lastScaleDownTime,omitempty"`
LastPollTime *metav1.Time `json:"lastPollTime,omitempty"`

// +listType=map
// +listMapKey=type
Conditions []metav1.Condition `json:"conditions,omitempty"`
}

// +kubebuilder:object:root=true

type EvaluatorPoolList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []EvaluatorPool `json:"items"`
}

func init() {
SchemeBuilder.Register(&EvaluatorPool{}, &EvaluatorPoolList{})
}
Loading
Loading