Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -334,11 +334,13 @@ skill-validator score evaluate --provider claude-cli <path>
| Provider | Env var | Default model | Covers |
|---|---|---|---|
| `anthropic` (default) | `ANTHROPIC_API_KEY` | `claude-sonnet-4-5-20250929` | Anthropic |
| `openai` | `OPENAI_API_KEY` | `gpt-5.2` | OpenAI, Ollama, Together, Groq, Azure, etc. |
| `openai` | `OPENAI_API_KEY` | `gpt-5.2` | OpenAI, Ollama, Together, Groq, Azure, etc. \*\* |
| `claude-cli` | _(none)_ | `sonnet` | Claude CLI (uses locally authenticated `claude` binary) \* |

\* **Accuracy note:** The `claude-cli` provider shells out to the `claude` CLI, which loads local context (CLAUDE.md files, project memory, rules) into each scoring call. This extra context may influence scores, making them less reproducible across environments compared to the API-based providers. For the most consistent results, use the `anthropic` or `openai` providers with an API key.

\*\* The `openai` provider also reads these optional environment variables: `OPENAI_BASE_URL` (API base URL, overridden by `--base-url`), `OPENAI_ORG_ID` (sent as `OpenAI-Organization` header), and `OPENAI_PROJECT_ID` (sent as `OpenAI-Project` header). The org/project headers are only sent when targeting an OpenAI endpoint (e.g. `api.openai.com`, `us.api.openai.com`), not when using third-party compatible APIs.

Use `--model` to override the default model and `--base-url` to point at any OpenAI-compatible endpoint (e.g. `http://localhost:11434/v1` for Ollama). If the endpoint requires a specific token limit parameter, use `--max-tokens-style` to override auto-detection:

| Value | Behavior |
Expand Down
24 changes: 21 additions & 3 deletions cmd/score_evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,13 @@ The path can be:
- A specific .md file — scores just that reference file

Requires an API key via environment variable:
ANTHROPIC_API_KEY (for --provider anthropic, the default)
OPENAI_API_KEY (for --provider openai)
ANTHROPIC_API_KEY (for --provider anthropic, the default)
OPENAI_API_KEY (for --provider openai)

Optional OpenAI environment variables:
OPENAI_BASE_URL (API base URL; overridden by --base-url flag)
OPENAI_ORG_ID (organization ID; sent as OpenAI-Organization header)
OPENAI_PROJECT_ID (project ID; sent as OpenAI-Project header)

The claude-cli provider uses the locally installed "claude" CLI and does not
require an API key. This is useful when the CLI is already authenticated
Expand Down Expand Up @@ -88,12 +93,25 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error {
}
}

// For the openai provider, read optional env vars for base URL, org, and project.
baseURL := evalBaseURL
var orgID, projectID string
if strings.ToLower(evalProvider) == "openai" {
if baseURL == "" {
baseURL = os.Getenv("OPENAI_BASE_URL")
}
orgID = os.Getenv("OPENAI_ORG_ID")
projectID = os.Getenv("OPENAI_PROJECT_ID")
}

client, err := judge.NewClient(judge.ClientOptions{
Provider: evalProvider,
APIKey: apiKey,
BaseURL: evalBaseURL,
BaseURL: baseURL,
Model: evalModel,
MaxTokensStyle: evalMaxTokensStyle,
OrgID: orgID,
ProjectID: projectID,
})
if err != nil {
return err
Expand Down
30 changes: 29 additions & 1 deletion judge/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"io"
"net/http"
"net/url"
"os/exec"
"strings"
"time"
Expand Down Expand Up @@ -38,6 +39,8 @@ type ClientOptions struct {
Model string // Optional; defaults per provider
MaxTokensStyle string // "auto", "max_tokens", or "max_completion_tokens"
MaxResponseTokens int // Maximum tokens in the LLM response; 0 defaults to 500
OrgID string // Optional OpenAI organization ID; sent as OpenAI-Organization header
ProjectID string // Optional OpenAI project ID; sent as OpenAI-Project header
}

// NewClient creates an LLMClient for the given options.
Expand Down Expand Up @@ -84,7 +87,7 @@ func NewClient(opts ClientOptions) (LLMClient, error) {
baseURL = "https://api.openai.com/v1"
}
baseURL = strings.TrimRight(baseURL, "/")
return &openaiClient{apiKey: opts.APIKey, baseURL: baseURL, model: model, maxTokensStyle: opts.MaxTokensStyle, maxTokens: maxResp}, nil
return &openaiClient{apiKey: opts.APIKey, baseURL: baseURL, model: model, maxTokensStyle: opts.MaxTokensStyle, maxTokens: maxResp, orgID: opts.OrgID, projectID: opts.ProjectID}, nil
default:
return nil, fmt.Errorf("unsupported provider %q (use \"anthropic\", \"openai\", or \"claude-cli\")", opts.Provider)
}
Expand Down Expand Up @@ -186,11 +189,25 @@ type openaiClient struct {
model string
maxTokensStyle string
maxTokens int
orgID string
projectID string
}

func (c *openaiClient) Provider() string { return "openai" }
func (c *openaiClient) ModelName() string { return c.model }

// isOpenAIHost reports whether the given base URL points to an OpenAI endpoint
// (api.openai.com, us.api.openai.com, etc.) as opposed to a third-party
// compatible API like Ollama or vLLM.
func isOpenAIHost(baseURL string) bool {
u, err := url.Parse(baseURL)
if err != nil {
return false
}
host := u.Hostname()
return strings.HasSuffix(host, ".openai.com")
}

type openaiRequest struct {
Model string `json:"model"`
MaxTokens int `json:"max_tokens,omitempty"`
Expand Down Expand Up @@ -266,6 +283,17 @@ func (c *openaiClient) Complete(ctx context.Context, systemPrompt, userContent s
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+c.apiKey)

// Only send OpenAI-specific org/project headers when targeting an
// OpenAI endpoint, so they don't leak to Ollama or other compatible APIs.
if isOpenAIHost(c.baseURL) {
if c.orgID != "" {
req.Header.Set("OpenAI-Organization", c.orgID)
}
if c.projectID != "" {
req.Header.Set("OpenAI-Project", c.projectID)
}
}

resp, err := defaultHTTPClient.Do(req)
if err != nil {
return "", fmt.Errorf("API request failed: %w", err)
Expand Down
142 changes: 142 additions & 0 deletions judge/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ import (
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"strings"
"testing"
"time"
)

// stubLookPath replaces the lookPath variable for the duration of a test,
Expand Down Expand Up @@ -140,6 +142,146 @@ func TestUseMaxCompletionTokens(t *testing.T) {
}
}

func TestIsOpenAIHost(t *testing.T) {
tests := []struct {
baseURL string
want bool
}{
{"https://api.openai.com/v1", true},
{"https://us.api.openai.com/v1", true},
{"https://eu.api.openai.com/v1", true},
{"http://localhost:11434/v1", false},
{"https://my-proxy.example.com/v1", false},
{"https://notopenai.com/v1", false},
{"not a url", false},
}

for _, tt := range tests {
t.Run(tt.baseURL, func(t *testing.T) {
got := isOpenAIHost(tt.baseURL)
if got != tt.want {
t.Errorf("isOpenAIHost(%q) = %v, want %v", tt.baseURL, got, tt.want)
}
})
}
}

func TestOpenAIClient_OrgProjectHeaders(t *testing.T) {
t.Run("headers sent for openai.com", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if got := r.Header.Get("OpenAI-Organization"); got != "org-123" {
t.Errorf("OpenAI-Organization = %q, want %q", got, "org-123")
}
if got := r.Header.Get("OpenAI-Project"); got != "proj-456" {
t.Errorf("OpenAI-Project = %q, want %q", got, "proj-456")
}
w.Header().Set("Content-Type", "application/json")
_, _ = fmt.Fprint(w, `{"choices": [{"message": {"content": "ok"}}]}`)
}))
defer server.Close()

// The test server isn't on openai.com, so we construct the client directly
// to test the header logic with an openai.com baseURL that actually points
// at the test server. Instead, we test the client with the test server URL
// and verify via a different approach: construct the openaiClient directly.
c := &openaiClient{
apiKey: "test-key",
baseURL: "https://api.openai.com/v1",
model: "gpt-4o",
maxTokens: 500,
orgID: "org-123",
projectID: "proj-456",
}

// Override defaultHTTPClient to proxy to test server
origClient := defaultHTTPClient
defer func() { defaultHTTPClient = origClient }()

// Use a custom transport that rewrites the host to the test server
testURL := server.URL
defaultHTTPClient = &http.Client{
Timeout: 5 * time.Second,
Transport: &rewriteTransport{target: testURL},
}

_, err := c.Complete(t.Context(), "system", "user")
if err != nil {
t.Fatalf("Complete failed: %v", err)
}
})

t.Run("headers not sent for custom base URL", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if got := r.Header.Get("OpenAI-Organization"); got != "" {
t.Errorf("OpenAI-Organization should be empty for non-OpenAI host, got %q", got)
}
if got := r.Header.Get("OpenAI-Project"); got != "" {
t.Errorf("OpenAI-Project should be empty for non-OpenAI host, got %q", got)
}
w.Header().Set("Content-Type", "application/json")
_, _ = fmt.Fprint(w, `{"choices": [{"message": {"content": "ok"}}]}`)
}))
defer server.Close()

client, err := NewClient(ClientOptions{
Provider: "openai",
APIKey: "test-key",
BaseURL: server.URL,
Model: "llama3",
OrgID: "org-123",
ProjectID: "proj-456",
})
if err != nil {
t.Fatalf("NewClient: %v", err)
}
_, err = client.Complete(t.Context(), "system", "user")
if err != nil {
t.Fatalf("Complete failed: %v", err)
}
})

t.Run("empty org and project not sent", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if got := r.Header.Get("OpenAI-Organization"); got != "" {
t.Errorf("OpenAI-Organization should be empty, got %q", got)
}
if got := r.Header.Get("OpenAI-Project"); got != "" {
t.Errorf("OpenAI-Project should be empty, got %q", got)
}
w.Header().Set("Content-Type", "application/json")
_, _ = fmt.Fprint(w, `{"choices": [{"message": {"content": "ok"}}]}`)
}))
defer server.Close()

client, err := NewClient(ClientOptions{
Provider: "openai",
APIKey: "test-key",
BaseURL: server.URL,
Model: "gpt-4o",
})
if err != nil {
t.Fatalf("NewClient: %v", err)
}
_, err = client.Complete(t.Context(), "system", "user")
if err != nil {
t.Fatalf("Complete failed: %v", err)
}
})
}

// rewriteTransport rewrites requests to a different target URL while
// preserving the original Host header for testing.
type rewriteTransport struct {
target string
}

func (t *rewriteTransport) RoundTrip(req *http.Request) (*http.Response, error) {
targetURL, _ := url.Parse(t.target)
req.URL.Scheme = targetURL.Scheme
req.URL.Host = targetURL.Host
return http.DefaultTransport.RoundTrip(req)
}

func TestMaxTokensStyleOverride(t *testing.T) {
tests := []struct {
name string
Expand Down
Loading