From 75c5a0e0cd6cbf9ba27e8d5717647b786e4ac6d2 Mon Sep 17 00:00:00 2001
From: Bobby Christopher <b.christopher.3rd@gmail.com>
Date: Thu, 11 Sep 2025 01:56:25 -0400
Subject: [PATCH 1/3] added patch

---
 internal/bundler/vercel_ai.go | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/internal/bundler/vercel_ai.go b/internal/bundler/vercel_ai.go
index 3fbea7c1..6c174e30 100644
--- a/internal/bundler/vercel_ai.go
+++ b/internal/bundler/vercel_ai.go
@@ -31,7 +31,13 @@ func createVercelAIProviderPatch(module string, createFn string, envkey string,
 }
 
 func init() {
-	var vercelTelemetryPatch = generateJSArgsPatch(0, `experimental_telemetry: { isEnabled: true }`)
+	var vercelTelemetryPatch = generateJSArgsPatch(0, ``) + fmt.Sprintf(`
+	const opts = {...(_args[0] ?? {}) };
+	const metadata = { promptId: opts.prompt.id };
+	opts.experimental_telemetry = { isEnabled: true , metadata: metadata };
+	opts.prompt = opts.prompt.toString();
+	_args[0] = opts;
+	`)
 	vercelAIPatches := patchModule{
 		Module: "ai",
 		Functions: map[string]patchAction{

From 07cf2b834b4c33db153268cda22464a331095e1d Mon Sep 17 00:00:00 2001
From: Bobby Christopher <b.christopher.3rd@gmail.com>
Date: Thu, 11 Sep 2025 09:54:47 -0400
Subject: [PATCH 2/3] added evals function

---
 cmd/eval.go | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 189 insertions(+)
 create mode 100644 cmd/eval.go

diff --git a/cmd/eval.go b/cmd/eval.go
new file mode 100644
index 00000000..8a380b90
--- /dev/null
+++ b/cmd/eval.go
@@ -0,0 +1,189 @@
+package cmd
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"syscall"
+
+	"github.com/agentuity/cli/internal/errsystem"
+	"github.com/agentuity/cli/internal/project"
+	"github.com/agentuity/cli/internal/util"
+	"github.com/agentuity/go-common/env"
+	"github.com/agentuity/go-common/logger"
+	"github.com/agentuity/go-common/sys"
+	"github.com/agentuity/go-common/tui"
+	"github.com/charmbracelet/huh/spinner"
+	"github.com/spf13/cobra"
+)
+
+type EvalResponse = project.Response[string]
+
+func CreateEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string, evalData string) error {
+	client := util.NewAPIClient(ctx, logger, baseUrl, token)
+
+	var resp EvalResponse
+	payload := map[string]any{
+		"data": evalData,
+	}
+	if err := client.Do("POST", fmt.Sprintf("/cli/project/%s/evaluations", projectId), payload, &resp); err != nil {
+		return fmt.Errorf("error creating evaluation: %w", err)
+	}
+	if !resp.Success {
+		return fmt.Errorf("failed to create evaluation: %s", resp.Message)
+	}
+	return nil
+}
+
+func PullEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string, evalId string) (string, error) {
+	client := util.NewAPIClient(ctx, logger, baseUrl, token)
+
+	var resp EvalResponse
+	if err := client.Do("GET", fmt.Sprintf("/cli/project/%s/evaluations/%s", projectId, evalId), nil, &resp); err != nil {
+		return "", fmt.Errorf("error pulling evaluation: %w", err)
+	}
+	if !resp.Success {
+		return "", fmt.Errorf("failed to pull evaluation: %s", resp.Message)
+	}
+	return resp.Data, nil
+}
+
+var evalCmd = &cobra.Command{
+	Use:   "eval",
+	Short: "Evaluation related commands",
+	Long: `Evaluation related commands for managing evaluations and test data.
+
+Use the subcommands to create and pull evaluation data to/from the cloud.`,
+	Run: func(cmd *cobra.Command, args []string) {
+		cmd.Help()
+	},
+}
+
+var evalCreateCmd = &cobra.Command{
+	Use:   "create [file]",
+	Short: "Create evaluation data in the cloud",
+	Long: `Create evaluation data in the cloud for your project.
+
+Arguments:
+  [file]    Optional path to evaluation file (defaults to evals.json)
+
+Flags:
+  --force     Don't prompt for confirmation
+
+Examples:
+  agentuity eval create
+  agentuity eval create evals.json
+  agentuity eval create --force my-evals.json`,
+	Run: func(cmd *cobra.Command, args []string) {
+		ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM)
+		defer cancel()
+
+		logger := env.NewLogger(cmd)
+		context := project.EnsureProject(ctx, cmd)
+		dir := context.Dir
+		apiUrl := context.APIURL
+		apiKey := context.Token
+		theproject := context.Project
+
+		force, _ := cmd.Flags().GetBool("force")
+
+		// Determine eval file path
+		var evalFile string
+		if len(args) > 0 {
+			evalFile = args[0]
+		} else {
+			evalFile = filepath.Join(dir, "evals.json")
+		}
+
+		// Check if file exists
+		if !sys.Exists(evalFile) {
+			errsystem.New(errsystem.ErrInvalidCommandFlag, fmt.Errorf("evaluation file not found: %s", evalFile)).ShowErrorAndExit()
+		}
+
+		// Read evaluation data
+		evalData, err := os.ReadFile(evalFile)
+		if err != nil {
+			errsystem.New(errsystem.ErrInvalidCommandFlag, err, errsystem.WithUserMessage("Failed to read evaluation file")).ShowErrorAndExit()
+		}
+
+		// Validate JSON
+		var evals interface{}
+		if err := json.Unmarshal(evalData, &evals); err != nil {
+			errsystem.New(errsystem.ErrInvalidCommandFlag, err, errsystem.WithUserMessage("Invalid JSON in evaluation file")).ShowErrorAndExit()
+		}
+
+		// Confirm create unless force flag is set
+		if !force {
+			if !tui.Ask(logger, fmt.Sprintf("Create evaluation from %s in the cloud?", evalFile), false) {
+				tui.ShowWarning("cancelled")
+				return
+			}
+		}
+
+		action := func() {
+			err := CreateEvaluation(ctx, logger, apiUrl, apiKey, theproject.ProjectId, string(evalData))
+			if err != nil {
+				errsystem.New(errsystem.ErrApiRequest, err, errsystem.WithUserMessage("Failed to create evaluation")).ShowErrorAndExit()
+			}
+		}
+
+		spinner.New().Title("Creating evaluation...").Action(action).Run()
+		tui.ShowSuccess("Evaluation created successfully")
+	},
+}
+
+var evalPullCmd = &cobra.Command{
+	Use:   "pull <id>",
+	Short: "Pull evaluation data from the cloud by ID",
+	Long: `Pull evaluation data from the cloud for your project using the evaluation ID.
+
+Arguments:
+  <id>    The evaluation ID to pull
+
+Examples:
+  agentuity eval pull abc123
+  agentuity eval pull def456`,
+	Args: cobra.ExactArgs(1),
+	Run: func(cmd *cobra.Command, args []string) {
+		ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM)
+		defer cancel()
+
+		logger := env.NewLogger(cmd)
+		context := project.EnsureProject(ctx, cmd)
+		apiUrl := context.APIURL
+		apiKey := context.Token
+		theproject := context.Project
+
+		evalId := args[0]
+
+		var evalData string
+		action := func() {
+			var err error
+			evalData, err = PullEvaluation(ctx, logger, apiUrl, apiKey, theproject.ProjectId, evalId)
+			if err != nil {
+				errsystem.New(errsystem.ErrApiRequest, err, errsystem.WithUserMessage("Failed to pull evaluation")).ShowErrorAndExit()
+			}
+		}
+
+		spinner.New().Title("Pulling evaluation...").Action(action).Run()
+
+		// Output to stdout
+		fmt.Println(evalData)
+	},
+}
+
+func init() {
+	rootCmd.AddCommand(evalCmd)
+
+	evalCreateCmd.Flags().Bool("force", !hasTTY, "Don't prompt for confirmation")
+
+	evalCmd.AddCommand(evalCreateCmd)
+	evalCmd.AddCommand(evalPullCmd)
+
+	for _, cmd := range []*cobra.Command{evalCreateCmd, evalPullCmd} {
+		cmd.Flags().StringP("dir", "d", ".", "The directory to the project")
+	}
+}

From 50bfdf914b61ab89a8d28a7d8a71c383cfa41af8 Mon Sep 17 00:00:00 2001
From: Bobby Christopher <b.christopher.3rd@gmail.com>
Date: Mon, 15 Sep 2025 10:39:10 -0400
Subject: [PATCH 3/3] added eval cmd

---
 cmd/eval.go | 206 ++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 161 insertions(+), 45 deletions(-)

diff --git a/cmd/eval.go b/cmd/eval.go
index 8a380b90..800dd0e7 100644
--- a/cmd/eval.go
+++ b/cmd/eval.go
@@ -2,7 +2,6 @@ package cmd
 
 import (
 	"context"
-	"encoding/json"
 	"fmt"
 	"os"
 	"os/signal"
@@ -14,41 +13,78 @@ import (
 	"github.com/agentuity/cli/internal/util"
 	"github.com/agentuity/go-common/env"
 	"github.com/agentuity/go-common/logger"
-	"github.com/agentuity/go-common/sys"
 	"github.com/agentuity/go-common/tui"
 	"github.com/charmbracelet/huh/spinner"
 	"github.com/spf13/cobra"
 )
 
-type EvalResponse = project.Response[string]
+type EvalObject struct {
+	ID          string `json:"id"`
+	Name        string `json:"name"`
+	Description string `json:"description"`
+	ProjectID   string `json:"projectId"`
+	OrgID       string `json:"orgId"`
+}
+
+type EvalPullObject struct {
+	Code        string `json:"code"`
+	ID          string `json:"id"`
+	Name        string `json:"name"`
+	Description string `json:"description"`
+}
+
+type EvalCreateResponse = project.Response[EvalObject]
+type EvalPullResponse = project.Response[EvalPullObject]
+
+func CreateGenerativeEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string) (string, error) {
+	client := util.NewAPIClient(ctx, logger, baseUrl, token)
+
+	var resp EvalCreateResponse
+	payload := map[string]any{
+		"projectId": projectId,
+		"type":      "generative",
+	}
+
+	if err := client.Do("POST", "/cli/eval", payload, &resp); err != nil {
+		return "", fmt.Errorf("error creating generative evaluation: %w", err)
+	}
+	if !resp.Success {
+		return "", fmt.Errorf("failed to create generative evaluation: %s", resp.Message)
+	}
+	return resp.Data.ID, nil
+}
 
-func CreateEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string, evalData string) error {
+func CreateTemplateEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string, name string, description string) (string, error) {
 	client := util.NewAPIClient(ctx, logger, baseUrl, token)
 
-	var resp EvalResponse
+	var resp EvalCreateResponse
 	payload := map[string]any{
-		"data": evalData,
+		"projectId":   projectId,
+		"name":        name,
+		"description": description,
+		"type":        "template",
 	}
-	if err := client.Do("POST", fmt.Sprintf("/cli/project/%s/evaluations", projectId), payload, &resp); err != nil {
-		return fmt.Errorf("error creating evaluation: %w", err)
+
+	if err := client.Do("POST", "/cli/eval", payload, &resp); err != nil {
+		return "", fmt.Errorf("error creating template evaluation: %w", err)
 	}
 	if !resp.Success {
-		return fmt.Errorf("failed to create evaluation: %s", resp.Message)
+		return "", fmt.Errorf("failed to create template evaluation: %s", resp.Message)
 	}
-	return nil
+	return resp.Data.ID, nil
 }
 
-func PullEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string, evalId string) (string, error) {
+func PullEvaluation(ctx context.Context, logger logger.Logger, baseUrl string, token string, evalId string) (*EvalPullObject, error) {
 	client := util.NewAPIClient(ctx, logger, baseUrl, token)
 
-	var resp EvalResponse
-	if err := client.Do("GET", fmt.Sprintf("/cli/project/%s/evaluations/%s", projectId, evalId), nil, &resp); err != nil {
-		return "", fmt.Errorf("error pulling evaluation: %w", err)
+	var resp EvalPullResponse
+	if err := client.Do("GET", fmt.Sprintf("/cli/eval/pull/%s", evalId), nil, &resp); err != nil {
+		return nil, fmt.Errorf("error pulling evaluation: %w", err)
 	}
 	if !resp.Success {
-		return "", fmt.Errorf("failed to pull evaluation: %s", resp.Message)
+		return nil, fmt.Errorf("failed to pull evaluation: %s", resp.Message)
 	}
-	return resp.Data, nil
+	return &resp.Data, nil
 }
 
 var evalCmd = &cobra.Command{
@@ -63,20 +99,21 @@ Use the subcommands to create and pull evaluation data to/from the cloud.`,
 }
 
 var evalCreateCmd = &cobra.Command{
-	Use:   "create [file]",
+	Use:   "create [name] [description]",
 	Short: "Create evaluation data in the cloud",
 	Long: `Create evaluation data in the cloud for your project.
 
 Arguments:
-  [file]    Optional path to evaluation file (defaults to evals.json)
+  [name]        Optional name for the evaluation
+  [description] Optional description for the evaluation
 
 Flags:
   --force     Don't prompt for confirmation
 
 Examples:
   agentuity eval create
-  agentuity eval create evals.json
-  agentuity eval create --force my-evals.json`,
+  agentuity eval create "My Eval" "Description of evaluation"
+  agentuity eval create --force "My Eval" "Description"`,
 	Run: func(cmd *cobra.Command, args []string) {
 		ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM)
 		defer cancel()
@@ -90,48 +127,111 @@ Examples:
 
 		force, _ := cmd.Flags().GetBool("force")
 
-		// Determine eval file path
-		var evalFile string
-		if len(args) > 0 {
-			evalFile = args[0]
+		// First, get the evaluation type
+		var evalType string
+		if !tui.HasTTY {
+			// Default to template when no TTY
+			evalType = "template"
 		} else {
-			evalFile = filepath.Join(dir, "evals.json")
+			evalType = tui.Select(logger, "What type of evaluation would you like to create?", "Choose between template-based or generative evaluation", []tui.Option{
+				{Text: tui.PadRight("Template", 20, " ") + tui.Muted("Use a predefined regex evaluation template"), ID: "template"},
+				{Text: tui.PadRight("Generative", 20, " ") + tui.Muted("AI will generate custom evaluation code"), ID: "generative"},
+			})
 		}
 
-		// Check if file exists
-		if !sys.Exists(evalFile) {
-			errsystem.New(errsystem.ErrInvalidCommandFlag, fmt.Errorf("evaluation file not found: %s", evalFile)).ShowErrorAndExit()
-		}
+		var name, description string
 
-		// Read evaluation data
-		evalData, err := os.ReadFile(evalFile)
-		if err != nil {
-			errsystem.New(errsystem.ErrInvalidCommandFlag, err, errsystem.WithUserMessage("Failed to read evaluation file")).ShowErrorAndExit()
-		}
+		// Get name and description only for template type
+		if evalType == "template" {
+			// Get name and description from args or prompt
+			if len(args) > 0 {
+				name = args[0]
+			}
+			if len(args) > 1 {
+				description = args[1]
+			}
+
+			// Interactive flow for name and description
+			if name == "" {
+				if !tui.HasTTY {
+					logger.Fatal("No TTY detected, please specify an evaluation name from the command line")
+				}
+				name = tui.InputWithValidation(logger, "What should we name this evaluation?", "The name helps identify the evaluation", 255, func(name string) error {
+					if name == "" {
+						return fmt.Errorf("evaluation name cannot be empty")
+					}
+					return nil
+				})
+			}
 
-		// Validate JSON
-		var evals interface{}
-		if err := json.Unmarshal(evalData, &evals); err != nil {
-			errsystem.New(errsystem.ErrInvalidCommandFlag, err, errsystem.WithUserMessage("Invalid JSON in evaluation file")).ShowErrorAndExit()
+			if description == "" {
+				description = tui.Input(logger, "How should we describe what this evaluation tests?", "The description is optional but helpful for understanding the purpose of the evaluation")
+			}
 		}
 
 		// Confirm create unless force flag is set
 		if !force {
-			if !tui.Ask(logger, fmt.Sprintf("Create evaluation from %s in the cloud?", evalFile), false) {
+			var confirmMessage string
+			if evalType == "template" {
+				confirmMessage = fmt.Sprintf("Create template evaluation '%s' in the cloud?", name)
+			} else {
+				confirmMessage = "Create generative evaluation in the cloud?"
+			}
+
+			if !tui.Ask(logger, confirmMessage, false) {
 				tui.ShowWarning("cancelled")
 				return
 			}
 		}
 
+		var evalId string
+		var evalObj *EvalPullObject
 		action := func() {
-			err := CreateEvaluation(ctx, logger, apiUrl, apiKey, theproject.ProjectId, string(evalData))
+			var err error
+
+			// Call the appropriate function based on type
+			if evalType == "template" {
+				evalId, err = CreateTemplateEvaluation(ctx, logger, apiUrl, apiKey, theproject.ProjectId, name, description)
+			} else {
+				evalId, err = CreateGenerativeEvaluation(ctx, logger, apiUrl, apiKey, theproject.ProjectId)
+			}
+
 			if err != nil {
 				errsystem.New(errsystem.ErrApiRequest, err, errsystem.WithUserMessage("Failed to create evaluation")).ShowErrorAndExit()
 			}
+
+			// Automatically pull the evaluation data
+			evalObj, err = PullEvaluation(ctx, logger, apiUrl, apiKey, evalId)
+			if err != nil {
+				errsystem.New(errsystem.ErrApiRequest, err, errsystem.WithUserMessage("Failed to pull evaluation data")).ShowErrorAndExit()
+			}
 		}
 
 		spinner.New().Title("Creating evaluation...").Action(action).Run()
-		tui.ShowSuccess("Evaluation created successfully")
+
+		// Write code to file
+		filename := evalObj.Name + ".ts"
+		evalsDir := filepath.Join(dir, "src", "evals")
+
+		// Create the evals directory if it doesn't exist
+		if err := os.MkdirAll(evalsDir, 0755); err != nil {
+			errsystem.New(errsystem.ErrCreateDirectory, err, errsystem.WithUserMessage("Failed to create evals directory")).ShowErrorAndExit()
+		}
+
+		filePath := filepath.Join(evalsDir, filename)
+		if err := os.WriteFile(filePath, []byte(evalObj.Code), 0644); err != nil {
+			errsystem.New(errsystem.ErrOpenFile, err, errsystem.WithUserMessage("Failed to write evaluation code to file")).ShowErrorAndExit()
+		}
+
+		if evalType == "template" {
+			tui.ShowSuccess("Template evaluation '%s' created successfully with ID: %s", name, evalId)
+		} else {
+			tui.ShowSuccess("Generative evaluation created successfully with ID: %s", evalId)
+		}
+
+		tui.ShowSuccess("Evaluation code written to: %s", filePath)
+		fmt.Println("\nEvaluation code:")
+		fmt.Println(evalObj.Code)
 	},
 }
 
@@ -153,16 +253,16 @@ Examples:
 
 		logger := env.NewLogger(cmd)
 		context := project.EnsureProject(ctx, cmd)
+		dir := context.Dir
 		apiUrl := context.APIURL
 		apiKey := context.Token
-		theproject := context.Project
 
 		evalId := args[0]
 
-		var evalData string
+		var evalObj *EvalPullObject
 		action := func() {
 			var err error
-			evalData, err = PullEvaluation(ctx, logger, apiUrl, apiKey, theproject.ProjectId, evalId)
+			evalObj, err = PullEvaluation(ctx, logger, apiUrl, apiKey, evalId)
 			if err != nil {
 				errsystem.New(errsystem.ErrApiRequest, err, errsystem.WithUserMessage("Failed to pull evaluation")).ShowErrorAndExit()
 			}
@@ -170,8 +270,24 @@ Examples:
 
 		spinner.New().Title("Pulling evaluation...").Action(action).Run()
 
+		// Write code to file
+		filename := evalObj.Name + ".ts"
+		evalsDir := filepath.Join(dir, "src", "evals")
+
+		// Create the evals directory if it doesn't exist
+		if err := os.MkdirAll(evalsDir, 0755); err != nil {
+			errsystem.New(errsystem.ErrCreateDirectory, err, errsystem.WithUserMessage("Failed to create evals directory")).ShowErrorAndExit()
+		}
+
+		filePath := filepath.Join(evalsDir, filename)
+		if err := os.WriteFile(filePath, []byte(evalObj.Code), 0644); err != nil {
+			errsystem.New(errsystem.ErrOpenFile, err, errsystem.WithUserMessage("Failed to write evaluation code to file")).ShowErrorAndExit()
+		}
+
+		tui.ShowSuccess("Evaluation code written to: %s", filePath)
+
 		// Output to stdout
-		fmt.Println(evalData)
+		fmt.Println(evalObj.Code)
 	},
 }