This repository was archived by the owner on Jan 23, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
Eval handler #472
Merged
Merged
Eval handler #472
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
932a51a
added eval handler code
potofpie 1377691
Merge branch 'main' of https://github.com/agentuity/cli into eval-han…
potofpie 9e72dd4
added eval run
potofpie d08a83c
add all changes for review
potofpie a87ac2d
fixes
potofpie 81c8c45
changing gears a bit
potofpie 05fb438
evals are running again
potofpie 729d22d
added rest of the important shit
potofpie 4cc12f9
Merge branch 'main' of https://github.com/agentuity/cli into eval-han…
potofpie dd9781c
added first pass review
potofpie b768b40
remove gravuity go change
potofpie da6463e
remove test build
potofpie 9498358
remove dev change
potofpie 1d8e8d0
made it work
potofpie 5f9a379
made it work
potofpie b1ff11f
fix build error
potofpie File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,186 @@ | ||
| package cmd | ||
|
|
||
| import ( | ||
| "context" | ||
| "encoding/json" | ||
| "fmt" | ||
| "os" | ||
| "os/signal" | ||
| "path/filepath" | ||
| "strings" | ||
| "syscall" | ||
|
|
||
| "github.com/agentuity/cli/internal/errsystem" | ||
| "github.com/agentuity/cli/internal/eval" | ||
| "github.com/agentuity/cli/internal/project" | ||
| "github.com/agentuity/cli/internal/util" | ||
| "github.com/agentuity/go-common/env" | ||
| "github.com/agentuity/go-common/logger" | ||
| "github.com/agentuity/go-common/tui" | ||
| "github.com/spf13/cobra" | ||
| ) | ||
|
|
||
| var evalCmd = &cobra.Command{ | ||
| Use: "eval", | ||
| Short: "Evaluation related commands", | ||
| Run: func(cmd *cobra.Command, args []string) { | ||
| cmd.Help() | ||
| }, | ||
| } | ||
|
|
||
| func getEvalInfoFlow(logger logger.Logger, name string, description string) (string, string) { | ||
| if name == "" { | ||
| if !tui.HasTTY { | ||
| logger.Fatal("No TTY detected, please specify an eval name from the command line") | ||
| } | ||
| name = tui.InputWithValidation(logger, "What should we name the evaluation?", "The name of the eval helps identify its purpose", 255, func(name string) error { | ||
| if name == "" { | ||
| return fmt.Errorf("Eval name cannot be empty") | ||
| } | ||
| return nil | ||
| }) | ||
| } | ||
|
|
||
| if description == "" { | ||
| description = tui.Input(logger, "How should we describe what the "+name+" eval does?", "The description of the eval is optional but helpful for understanding its purpose") | ||
| } | ||
|
|
||
| return name, description | ||
| } | ||
|
|
||
| func generateEvalFile(logger logger.Logger, projectDir string, evalID string, slug string, name string, description string) error { | ||
| // Always generate TypeScript files for evals | ||
| ext := ".ts" | ||
|
|
||
| // Create evals directory if it doesn't exist | ||
| evalsDir := filepath.Join(projectDir, "src", "evals") | ||
| if err := os.MkdirAll(evalsDir, 0755); err != nil { | ||
| return fmt.Errorf("failed to create evals directory: %w", err) | ||
| } | ||
|
|
||
| // Generate file path | ||
| filename := filepath.Join(evalsDir, slug+ext) | ||
|
|
||
| // Check if file already exists | ||
| if util.Exists(filename) { | ||
| return fmt.Errorf("eval file already exists: %s", filename) | ||
| } | ||
|
|
||
| // Generate TypeScript content with metadata | ||
| content := fmt.Sprintf(`import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; | ||
|
|
||
| export const metadata = { | ||
| id: '%s', | ||
| slug: '%s', | ||
| name: '%s', | ||
| description: '%s' | ||
| }; | ||
|
|
||
| /** | ||
| * %s | ||
| * %s | ||
| */ | ||
| export default async function evaluate( | ||
| _ctx: EvalContext, | ||
| req: EvalRequest, | ||
| res: EvalResponse | ||
| ) { | ||
| const { input, output } = req; | ||
|
|
||
| // TODO: Implement your evaluation logic here | ||
| // Example: Score the output based on some criteria | ||
|
|
||
| const score = 0.8; // Replace with your actual scoring logic | ||
| const metadata = { | ||
| reasoning: 'Replace with your evaluation reasoning' | ||
| }; | ||
|
|
||
| res.score(score, metadata); | ||
| } | ||
| `, evalID, slug, name, description, name, description) | ||
|
|
||
| // Write file | ||
| if err := os.WriteFile(filename, []byte(content), 0644); err != nil { | ||
| return fmt.Errorf("failed to write eval file: %w", err) | ||
| } | ||
|
|
||
| logger.Debug("Created eval file: %s", filename) | ||
| return nil | ||
| } | ||
|
|
||
| var evalCreateCmd = &cobra.Command{ | ||
| Use: "create [name] [description]", | ||
| Short: "Create a new evaluation function", | ||
| Aliases: []string{"new"}, | ||
| Args: cobra.MaximumNArgs(2), | ||
| Run: func(cmd *cobra.Command, args []string) { | ||
| ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM) | ||
| defer cancel() | ||
| logger := env.NewLogger(cmd) | ||
| theproject := project.EnsureProject(ctx, cmd) | ||
| apikey := theproject.Token | ||
| urls := util.GetURLs(logger) | ||
| apiUrl := urls.API | ||
|
|
||
| var name string | ||
| var description string | ||
|
|
||
| if len(args) > 0 { | ||
| name = args[0] | ||
| } | ||
|
|
||
| if len(args) > 1 { | ||
| description = args[1] | ||
| } | ||
|
|
||
| name, description = getEvalInfoFlow(logger, name, description) | ||
|
|
||
| // Generate slug from name | ||
| isPython := theproject.Project.Bundler.Language == "python" | ||
| slug := util.SafeProjectFilename(strings.ToLower(name), isPython) | ||
|
|
||
| var evalID string | ||
| var evalErr error | ||
|
|
||
| action := func() { | ||
| // Create eval via API | ||
| evalID, evalErr = eval.CreateEval(ctx, logger, apiUrl, apikey, theproject.Project.ProjectId, slug, name, description) | ||
| if evalErr != nil { | ||
| errsystem.New(errsystem.ErrApiRequest, evalErr, errsystem.WithContextMessage("Failed to create eval")).ShowErrorAndExit() | ||
| } | ||
|
|
||
| logger.Debug("Created eval with ID: %s", evalID) | ||
|
|
||
| // Generate eval file (always TypeScript) with the real ID from API | ||
| if err := generateEvalFile(logger, theproject.Dir, evalID, slug, name, description); err != nil { | ||
| errsystem.New(errsystem.ErrOpenFile, err, errsystem.WithContextMessage("Failed to create eval file")).ShowErrorAndExit() | ||
| } | ||
| } | ||
|
|
||
| tui.ShowSpinner("Creating evaluation ...", action) | ||
|
|
||
| format, _ := cmd.Flags().GetString("format") | ||
| if format == "json" { | ||
| result := map[string]string{ | ||
| "id": evalID, | ||
| "slug": slug, | ||
| "name": name, | ||
| "description": description, | ||
| } | ||
| json.NewEncoder(os.Stdout).Encode(result) | ||
| } else { | ||
| tui.ShowSuccess("Evaluation created successfully") | ||
| fmt.Printf("\nFile created: %s\n", tui.Muted(fmt.Sprintf("src/evals/%s.ts", slug))) | ||
| } | ||
| }, | ||
| } | ||
|
|
||
| func init() { | ||
| rootCmd.AddCommand(evalCmd) | ||
| evalCmd.AddCommand(evalCreateCmd) | ||
|
|
||
| for _, cmd := range []*cobra.Command{evalCreateCmd} { | ||
| cmd.Flags().StringP("dir", "d", "", "The project directory") | ||
| cmd.Flags().String("format", "text", "The format to use for the output. Can be either 'text' or 'json'") | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.