From 75c5a0e0cd6cbf9ba27e8d5717647b786e4ac6d2 Mon Sep 17 00:00:00 2001 From: Bobby Christopher Date: Thu, 11 Sep 2025 01:56:25 -0400 Subject: [PATCH 1/3] added patch --- internal/bundler/vercel_ai.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/bundler/vercel_ai.go b/internal/bundler/vercel_ai.go index 3fbea7c1..6c174e30 100644 --- a/internal/bundler/vercel_ai.go +++ b/internal/bundler/vercel_ai.go @@ -31,7 +31,13 @@ func createVercelAIProviderPatch(module string, createFn string, envkey string, } func init() { - var vercelTelemetryPatch = generateJSArgsPatch(0, `experimental_telemetry: { isEnabled: true }`) + var vercelTelemetryPatch = generateJSArgsPatch(0, ``) + fmt.Sprintf(` + const opts = {...(_args[0] ?? {}) }; + const metadata = { promptId: opts.prompt.id }; + opts.experimental_telemetry = { isEnabled: true , metadata: metadata }; + opts.prompt = opts.prompt.toString(); + _args[0] = opts; + `) vercelAIPatches := patchModule{ Module: "ai", Functions: map[string]patchAction{ From 07cf2b834b4c33db153268cda22464a331095e1d Mon Sep 17 00:00:00 2001 From: Bobby Christopher Date: Thu, 11 Sep 2025 09:54:47 -0400 Subject: [PATCH 2/3] added evals function --- cmd/eval.go | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 cmd/eval.go diff --git a/cmd/eval.go b/cmd/eval.go new file mode 100644 index 00000000..8a380b90 --- /dev/null +++ b/cmd/eval.go @@ -0,0 +1,189 @@ +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "os" + "os/signal" + "path/filepath" + "syscall" + + "github.com/agentuity/cli/internal/errsystem" + "github.com/agentuity/cli/internal/project" + "github.com/agentuity/cli/internal/util" + "github.com/agentuity/go-common/env" + "github.com/agentuity/go-common/logger" + "github.com/agentuity/go-common/sys" + "github.com/agentuity/go-common/tui" + "github.com/charmbracelet/huh/spinner" + "github.com/spf13/cobra" +) + +type EvalResponse = project.Response[string] + +func CreateEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string, evalData string) error { + client := util.NewAPIClient(ctx, logger, baseUrl, token) + + var resp EvalResponse + payload := map[string]any{ + "data": evalData, + } + if err := client.Do("POST", fmt.Sprintf("/cli/project/%s/evaluations", projectId), payload, &resp); err != nil { + return fmt.Errorf("error creating evaluation: %w", err) + } + if !resp.Success { + return fmt.Errorf("failed to create evaluation: %s", resp.Message) + } + return nil +} + +func PullEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string, evalId string) (string, error) { + client := util.NewAPIClient(ctx, logger, baseUrl, token) + + var resp EvalResponse + if err := client.Do("GET", fmt.Sprintf("/cli/project/%s/evaluations/%s", projectId, evalId), nil, &resp); err != nil { + return "", fmt.Errorf("error pulling evaluation: %w", err) + } + if !resp.Success { + return "", fmt.Errorf("failed to pull evaluation: %s", resp.Message) + } + return resp.Data, nil +} + +var evalCmd = &cobra.Command{ + Use: "eval", + Short: "Evaluation related commands", + Long: `Evaluation related commands for managing evaluations and test data. + +Use the subcommands to create and pull evaluation data to/from the cloud.`, + Run: func(cmd *cobra.Command, args []string) { + cmd.Help() + }, +} + +var evalCreateCmd = &cobra.Command{ + Use: "create [file]", + Short: "Create evaluation data in the cloud", + Long: `Create evaluation data in the cloud for your project. + +Arguments: + [file] Optional path to evaluation file (defaults to evals.json) + +Flags: + --force Don't prompt for confirmation + +Examples: + agentuity eval create + agentuity eval create evals.json + agentuity eval create --force my-evals.json`, + Run: func(cmd *cobra.Command, args []string) { + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM) + defer cancel() + + logger := env.NewLogger(cmd) + context := project.EnsureProject(ctx, cmd) + dir := context.Dir + apiUrl := context.APIURL + apiKey := context.Token + theproject := context.Project + + force, _ := cmd.Flags().GetBool("force") + + // Determine eval file path + var evalFile string + if len(args) > 0 { + evalFile = args[0] + } else { + evalFile = filepath.Join(dir, "evals.json") + } + + // Check if file exists + if !sys.Exists(evalFile) { + errsystem.New(errsystem.ErrInvalidCommandFlag, fmt.Errorf("evaluation file not found: %s", evalFile)).ShowErrorAndExit() + } + + // Read evaluation data + evalData, err := os.ReadFile(evalFile) + if err != nil { + errsystem.New(errsystem.ErrInvalidCommandFlag, err, errsystem.WithUserMessage("Failed to read evaluation file")).ShowErrorAndExit() + } + + // Validate JSON + var evals interface{} + if err := json.Unmarshal(evalData, &evals); err != nil { + errsystem.New(errsystem.ErrInvalidCommandFlag, err, errsystem.WithUserMessage("Invalid JSON in evaluation file")).ShowErrorAndExit() + } + + // Confirm create unless force flag is set + if !force { + if !tui.Ask(logger, fmt.Sprintf("Create evaluation from %s in the cloud?", evalFile), false) { + tui.ShowWarning("cancelled") + return + } + } + + action := func() { + err := CreateEvaluation(ctx, logger, apiUrl, apiKey, theproject.ProjectId, string(evalData)) + if err != nil { + errsystem.New(errsystem.ErrApiRequest, err, errsystem.WithUserMessage("Failed to create evaluation")).ShowErrorAndExit() + } + } + + spinner.New().Title("Creating evaluation...").Action(action).Run() + tui.ShowSuccess("Evaluation created successfully") + }, +} + +var evalPullCmd = &cobra.Command{ + Use: "pull ", + Short: "Pull evaluation data from the cloud by ID", + Long: `Pull evaluation data from the cloud for your project using the evaluation ID. + +Arguments: + The evaluation ID to pull + +Examples: + agentuity eval pull abc123 + agentuity eval pull def456`, + Args: cobra.ExactArgs(1), + Run: func(cmd *cobra.Command, args []string) { + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM) + defer cancel() + + logger := env.NewLogger(cmd) + context := project.EnsureProject(ctx, cmd) + apiUrl := context.APIURL + apiKey := context.Token + theproject := context.Project + + evalId := args[0] + + var evalData string + action := func() { + var err error + evalData, err = PullEvaluation(ctx, logger, apiUrl, apiKey, theproject.ProjectId, evalId) + if err != nil { + errsystem.New(errsystem.ErrApiRequest, err, errsystem.WithUserMessage("Failed to pull evaluation")).ShowErrorAndExit() + } + } + + spinner.New().Title("Pulling evaluation...").Action(action).Run() + + // Output to stdout + fmt.Println(evalData) + }, +} + +func init() { + rootCmd.AddCommand(evalCmd) + + evalCreateCmd.Flags().Bool("force", !hasTTY, "Don't prompt for confirmation") + + evalCmd.AddCommand(evalCreateCmd) + evalCmd.AddCommand(evalPullCmd) + + for _, cmd := range []*cobra.Command{evalCreateCmd, evalPullCmd} { + cmd.Flags().StringP("dir", "d", ".", "The directory to the project") + } +} From 50bfdf914b61ab89a8d28a7d8a71c383cfa41af8 Mon Sep 17 00:00:00 2001 From: Bobby Christopher Date: Mon, 15 Sep 2025 10:39:10 -0400 Subject: [PATCH 3/3] added eval cmd --- cmd/eval.go | 206 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 161 insertions(+), 45 deletions(-) diff --git a/cmd/eval.go b/cmd/eval.go index 8a380b90..800dd0e7 100644 --- a/cmd/eval.go +++ b/cmd/eval.go @@ -2,7 +2,6 @@ package cmd import ( "context" - "encoding/json" "fmt" "os" "os/signal" @@ -14,41 +13,78 @@ import ( "github.com/agentuity/cli/internal/util" "github.com/agentuity/go-common/env" "github.com/agentuity/go-common/logger" - "github.com/agentuity/go-common/sys" "github.com/agentuity/go-common/tui" "github.com/charmbracelet/huh/spinner" "github.com/spf13/cobra" ) -type EvalResponse = project.Response[string] +type EvalObject struct { + ID string `json:"id"` + Name string `json:"name"` + Description string `json:"description"` + ProjectID string `json:"projectId"` + OrgID string `json:"orgId"` +} + +type EvalPullObject struct { + Code string `json:"code"` + ID string `json:"id"` + Name string `json:"name"` + Description string `json:"description"` +} + +type EvalCreateResponse = project.Response[EvalObject] +type EvalPullResponse = project.Response[EvalPullObject] + +func CreateGenerativeEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string) (string, error) { + client := util.NewAPIClient(ctx, logger, baseUrl, token) + + var resp EvalCreateResponse + payload := map[string]any{ + "projectId": projectId, + "type": "generative", + } + + if err := client.Do("POST", "/cli/eval", payload, &resp); err != nil { + return "", fmt.Errorf("error creating generative evaluation: %w", err) + } + if !resp.Success { + return "", fmt.Errorf("failed to create generative evaluation: %s", resp.Message) + } + return resp.Data.ID, nil +} -func CreateEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string, evalData string) error { +func CreateTemplateEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string, name string, description string) (string, error) { client := util.NewAPIClient(ctx, logger, baseUrl, token) - var resp EvalResponse + var resp EvalCreateResponse payload := map[string]any{ - "data": evalData, + "projectId": projectId, + "name": name, + "description": description, + "type": "template", } - if err := client.Do("POST", fmt.Sprintf("/cli/project/%s/evaluations", projectId), payload, &resp); err != nil { - return fmt.Errorf("error creating evaluation: %w", err) + + if err := client.Do("POST", "/cli/eval", payload, &resp); err != nil { + return "", fmt.Errorf("error creating template evaluation: %w", err) } if !resp.Success { - return fmt.Errorf("failed to create evaluation: %s", resp.Message) + return "", fmt.Errorf("failed to create template evaluation: %s", resp.Message) } - return nil + return resp.Data.ID, nil } -func PullEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string, evalId string) (string, error) { +func PullEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, evalId string) (*EvalPullObject, error) { client := util.NewAPIClient(ctx, logger, baseUrl, token) - var resp EvalResponse - if err := client.Do("GET", fmt.Sprintf("/cli/project/%s/evaluations/%s", projectId, evalId), nil, &resp); err != nil { - return "", fmt.Errorf("error pulling evaluation: %w", err) + var resp EvalPullResponse + if err := client.Do("GET", fmt.Sprintf("/cli/eval/pull/%s", evalId), nil, &resp); err != nil { + return nil, fmt.Errorf("error pulling evaluation: %w", err) } if !resp.Success { - return "", fmt.Errorf("failed to pull evaluation: %s", resp.Message) + return nil, fmt.Errorf("failed to pull evaluation: %s", resp.Message) } - return resp.Data, nil + return &resp.Data, nil } var evalCmd = &cobra.Command{ @@ -63,20 +99,21 @@ Use the subcommands to create and pull evaluation data to/from the cloud.`, } var evalCreateCmd = &cobra.Command{ - Use: "create [file]", + Use: "create [name] [description]", Short: "Create evaluation data in the cloud", Long: `Create evaluation data in the cloud for your project. Arguments: - [file] Optional path to evaluation file (defaults to evals.json) + [name] Optional name for the evaluation + [description] Optional description for the evaluation Flags: --force Don't prompt for confirmation Examples: agentuity eval create - agentuity eval create evals.json - agentuity eval create --force my-evals.json`, + agentuity eval create "My Eval" "Description of evaluation" + agentuity eval create --force "My Eval" "Description"`, Run: func(cmd *cobra.Command, args []string) { ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM) defer cancel() @@ -90,48 +127,111 @@ Examples: force, _ := cmd.Flags().GetBool("force") - // Determine eval file path - var evalFile string - if len(args) > 0 { - evalFile = args[0] + // First, get the evaluation type + var evalType string + if !tui.HasTTY { + // Default to template when no TTY + evalType = "template" } else { - evalFile = filepath.Join(dir, "evals.json") + evalType = tui.Select(logger, "What type of evaluation would you like to create?", "Choose between template-based or generative evaluation", []tui.Option{ + {Text: tui.PadRight("Template", 20, " ") + tui.Muted("Use a predefined regex evaluation template"), ID: "template"}, + {Text: tui.PadRight("Generative", 20, " ") + tui.Muted("AI will generate custom evaluation code"), ID: "generative"}, + }) } - // Check if file exists - if !sys.Exists(evalFile) { - errsystem.New(errsystem.ErrInvalidCommandFlag, fmt.Errorf("evaluation file not found: %s", evalFile)).ShowErrorAndExit() - } + var name, description string - // Read evaluation data - evalData, err := os.ReadFile(evalFile) - if err != nil { - errsystem.New(errsystem.ErrInvalidCommandFlag, err, errsystem.WithUserMessage("Failed to read evaluation file")).ShowErrorAndExit() - } + // Get name and description only for template type + if evalType == "template" { + // Get name and description from args or prompt + if len(args) > 0 { + name = args[0] + } + if len(args) > 1 { + description = args[1] + } + + // Interactive flow for name and description + if name == "" { + if !tui.HasTTY { + logger.Fatal("No TTY detected, please specify an evaluation name from the command line") + } + name = tui.InputWithValidation(logger, "What should we name this evaluation?", "The name helps identify the evaluation", 255, func(name string) error { + if name == "" { + return fmt.Errorf("evaluation name cannot be empty") + } + return nil + }) + } - // Validate JSON - var evals interface{} - if err := json.Unmarshal(evalData, &evals); err != nil { - errsystem.New(errsystem.ErrInvalidCommandFlag, err, errsystem.WithUserMessage("Invalid JSON in evaluation file")).ShowErrorAndExit() + if description == "" { + description = tui.Input(logger, "How should we describe what this evaluation tests?", "The description is optional but helpful for understanding the purpose of the evaluation") + } } // Confirm create unless force flag is set if !force { - if !tui.Ask(logger, fmt.Sprintf("Create evaluation from %s in the cloud?", evalFile), false) { + var confirmMessage string + if evalType == "template" { + confirmMessage = fmt.Sprintf("Create template evaluation '%s' in the cloud?", name) + } else { + confirmMessage = "Create generative evaluation in the cloud?" + } + + if !tui.Ask(logger, confirmMessage, false) { tui.ShowWarning("cancelled") return } } + var evalId string + var evalObj *EvalPullObject action := func() { - err := CreateEvaluation(ctx, logger, apiUrl, apiKey, theproject.ProjectId, string(evalData)) + var err error + + // Call the appropriate function based on type + if evalType == "template" { + evalId, err = CreateTemplateEvaluation(ctx, logger, apiUrl, apiKey, theproject.ProjectId, name, description) + } else { + evalId, err = CreateGenerativeEvaluation(ctx, logger, apiUrl, apiKey, theproject.ProjectId) + } + if err != nil { errsystem.New(errsystem.ErrApiRequest, err, errsystem.WithUserMessage("Failed to create evaluation")).ShowErrorAndExit() } + + // Automatically pull the evaluation data + evalObj, err = PullEvaluation(ctx, logger, apiUrl, apiKey, evalId) + if err != nil { + errsystem.New(errsystem.ErrApiRequest, err, errsystem.WithUserMessage("Failed to pull evaluation data")).ShowErrorAndExit() + } } spinner.New().Title("Creating evaluation...").Action(action).Run() - tui.ShowSuccess("Evaluation created successfully") + + // Write code to file + filename := evalObj.Name + ".ts" + evalsDir := filepath.Join(dir, "src", "evals") + + // Create the evals directory if it doesn't exist + if err := os.MkdirAll(evalsDir, 0755); err != nil { + errsystem.New(errsystem.ErrCreateDirectory, err, errsystem.WithUserMessage("Failed to create evals directory")).ShowErrorAndExit() + } + + filePath := filepath.Join(evalsDir, filename) + if err := os.WriteFile(filePath, []byte(evalObj.Code), 0644); err != nil { + errsystem.New(errsystem.ErrOpenFile, err, errsystem.WithUserMessage("Failed to write evaluation code to file")).ShowErrorAndExit() + } + + if evalType == "template" { + tui.ShowSuccess("Template evaluation '%s' created successfully with ID: %s", name, evalId) + } else { + tui.ShowSuccess("Generative evaluation created successfully with ID: %s", evalId) + } + + tui.ShowSuccess("Evaluation code written to: %s", filePath) + fmt.Println("\nEvaluation code:") + fmt.Println(evalObj.Code) }, } @@ -153,16 +253,16 @@ Examples: logger := env.NewLogger(cmd) context := project.EnsureProject(ctx, cmd) + dir := context.Dir apiUrl := context.APIURL apiKey := context.Token - theproject := context.Project evalId := args[0] - var evalData string + var evalObj *EvalPullObject action := func() { var err error - evalData, err = PullEvaluation(ctx, logger, apiUrl, apiKey, theproject.ProjectId, evalId) + evalObj, err = PullEvaluation(ctx, logger, apiUrl, apiKey, evalId) if err != nil { errsystem.New(errsystem.ErrApiRequest, err, errsystem.WithUserMessage("Failed to pull evaluation")).ShowErrorAndExit() } @@ -170,8 +270,24 @@ Examples: spinner.New().Title("Pulling evaluation...").Action(action).Run() + // Write code to file + filename := evalObj.Name + ".ts" + evalsDir := filepath.Join(dir, "src", "evals") + + // Create the evals directory if it doesn't exist + if err := os.MkdirAll(evalsDir, 0755); err != nil { + errsystem.New(errsystem.ErrCreateDirectory, err, errsystem.WithUserMessage("Failed to create evals directory")).ShowErrorAndExit() + } + + filePath := filepath.Join(evalsDir, filename) + if err := os.WriteFile(filePath, []byte(evalObj.Code), 0644); err != nil { + errsystem.New(errsystem.ErrOpenFile, err, errsystem.WithUserMessage("Failed to write evaluation code to file")).ShowErrorAndExit() + } + + tui.ShowSuccess("Evaluation code written to: %s", filePath) + // Output to stdout - fmt.Println(evalData) + fmt.Println(evalObj.Code) }, }