agentuity · potofpie · Oct 22, 2025 · Oct 14, 2025 · Oct 14, 2025 · Oct 15, 2025
diff --git a/cmd/cloud.go b/cmd/cloud.go
@@ -18,6 +18,7 @@ import (
 	"syscall"
 	"time"
 
+	"github.com/agentuity/cli/internal/bundler/prompts"
 	"github.com/agentuity/cli/internal/deployer"
 	"github.com/agentuity/cli/internal/envutil"
 	"github.com/agentuity/cli/internal/errsystem"
@@ -73,6 +74,21 @@ type startAgent struct {
 	Remove bool `json:"remove,omitempty"`
 }
 
+type PromptVariable struct {
+	Name     string `json:"name"`
+	Required bool   `json:"required,omitempty"`
+	Default  string `json:"default,omitempty"`
+}
+
+type DeployPrompt struct {
+	Slug        string           `json:"slug"`
+	Name        string           `json:"name"`
+	System      *string          `json:"system,omitempty"`
+	Prompt      *string          `json:"prompt,omitempty"`
+	Variables   []PromptVariable `json:"variables,omitempty"`
+	Description *string          `json:"description,omitempty"`
+}
+
 type startRequest struct {
 	Agents         []startAgent       `json:"agents"`
 	Resources      *Resources         `json:"resources,omitempty"`
@@ -81,6 +97,7 @@ type startRequest struct {
 	TagDescription string             `json:"description,omitempty"`
 	TagMessage     string             `json:"message,omitempty"`
 	UsePrivateKey  bool               `json:"usePrivateKey,omitempty"`
+	Prompts        []DeployPrompt     `json:"prompts,omitempty"`
 }
 
 func ShowNewProjectImport(ctx context.Context, logger logger.Logger, cmd *cobra.Command, apiUrl string, apikey string, projectId string, project *project.Project, dir string, isImport bool) {
@@ -425,6 +442,18 @@ Examples:
 		startRequest.TagMessage = message
 		startRequest.UsePrivateKey = true
 
+		// Collect prompts data if prompts feature flag is enabled
+		promptsEvalsFF := CheckFeatureFlag(cmd, FeaturePromptsEvals, "enable-prompts-evals")
+		if promptsEvalsFF {
+			prompts, err := collectPromptsData(logger, dir)
+			if err != nil {
+				logger.Debug("Failed to collect prompts data: %v", err)
+			} else {
+				startRequest.Prompts = prompts
+				logger.Debug("Collected %d prompts for deployment", len(prompts))
+			}
+		}
+
 		// Start deployment
 		if err := client.Do("PUT", fmt.Sprintf("/cli/deploy/start/%s%s", theproject.ProjectId, deploymentId), startRequest, &startResponse); err != nil {
 			errsystem.New(errsystem.ErrDeployProject, err,
@@ -1028,6 +1057,98 @@ Examples:
 	},
 }
 
+// collectPromptsData collects prompts data from the project directory
+func collectPromptsData(logger logger.Logger, dir string) ([]DeployPrompt, error) {
+	// Find all prompt files
+	promptFiles := prompts.FindAllPromptFiles(dir)
+	if len(promptFiles) == 0 {
+		logger.Debug("No prompt files found")
+		return nil, nil
+	}
+
+	logger.Debug("Found %d prompt files: %v", len(promptFiles), promptFiles)
+
+	var allPrompts []DeployPrompt
+
+	// Parse all prompt files and combine prompts
+	for _, promptFile := range promptFiles {
+		data, err := os.ReadFile(promptFile)
+		if err != nil {
+			return nil, fmt.Errorf("failed to read %s: %w", promptFile, err)
+		}
+
+		promptsList, err := prompts.ParsePromptsYAML(data)
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse %s: %w", promptFile, err)
+		}
+
+		// Convert to DeployPrompt format
+		for _, prompt := range promptsList {
+			deployPrompt := DeployPrompt{
+				Slug:        prompt.Slug,
+				Name:        prompt.Name,
+				Description: &prompt.Description,
+			}
+
+			// Convert system prompt
+			if prompt.System != "" {
+				deployPrompt.System = &prompt.System
+			}
+
+			// Convert user prompt
+			if prompt.Prompt != "" {
+				deployPrompt.Prompt = &prompt.Prompt
+			}
+
+			// Convert variables from templates
+			var variables []PromptVariable
+			if prompt.SystemTemplate.Variables != nil {
+				for _, v := range prompt.SystemTemplate.Variables {
+					variables = append(variables, PromptVariable{
+						Name:     v.Name,
+						Required: v.IsRequired,
+						Default:  v.DefaultValue,
+					})
+				}
+			}
+			if prompt.PromptTemplate.Variables != nil {
+				for _, v := range prompt.PromptTemplate.Variables {
+					// Check if variable already exists
+					found := false
+					for i, existing := range variables {
+						if existing.Name == v.Name {
+							// Update existing variable if it's more restrictive
+							if v.IsRequired && !existing.Required {
+								variables[i].Required = true
+							}
+							if v.DefaultValue != "" && existing.Default == "" {
+								variables[i].Default = v.DefaultValue
+							}
+							found = true
+							break
+						}
+					}
+					if !found {
+						variables = append(variables, PromptVariable{
+							Name:     v.Name,
+							Required: v.IsRequired,
+							Default:  v.DefaultValue,
+						})
+					}
+				}
+			}
+
+			deployPrompt.Variables = variables
+			allPrompts = append(allPrompts, deployPrompt)
+		}
+
+		logger.Debug("Parsed %d prompts from %s", len(promptsList), promptFile)
+	}
+
+	logger.Debug("Total prompts collected: %d", len(allPrompts))
+	return allPrompts, nil
+}
+
 func init() {
 	rootCmd.AddCommand(cloudCmd)
 	rootCmd.AddCommand(cloudDeployCmd)

diff --git a/cmd/dev.go b/cmd/dev.go
@@ -49,7 +49,6 @@ Examples:
 		apiUrl := urls.API
 		appUrl := urls.App
 		gravityUrl := urls.Gravity
-
 		noBuild, _ := cmd.Flags().GetBool("no-build")
 
 		promptsEvalsFF := CheckFeatureFlag(cmd, FeaturePromptsEvals, "enable-prompts-evals")
@@ -159,7 +158,7 @@ Examples:
 				if errors.Is(err, context.Canceled) {
 					return
 				}
-				log.Fatal("failed to start devmode connection: %s", err)
+				log.Error("failed to start live dev connection: %s", err)
 				return
 			}
 		}

diff --git a/cmd/eval.go b/cmd/eval.go
@@ -0,0 +1,186 @@
+package cmd
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"strings"
+	"syscall"
+
+	"github.com/agentuity/cli/internal/errsystem"
+	"github.com/agentuity/cli/internal/eval"
+	"github.com/agentuity/cli/internal/project"
+	"github.com/agentuity/cli/internal/util"
+	"github.com/agentuity/go-common/env"
+	"github.com/agentuity/go-common/logger"
+	"github.com/agentuity/go-common/tui"
+	"github.com/spf13/cobra"
+)
+
+var evalCmd = &cobra.Command{
+	Use:   "eval",
+	Short: "Evaluation related commands",
+	Run: func(cmd *cobra.Command, args []string) {
+		cmd.Help()
+	},
+}
+
+func getEvalInfoFlow(logger logger.Logger, name string, description string) (string, string) {
+	if name == "" {
+		if !tui.HasTTY {
+			logger.Fatal("No TTY detected, please specify an eval name from the command line")
+		}
+		name = tui.InputWithValidation(logger, "What should we name the evaluation?", "The name of the eval helps identify its purpose", 255, func(name string) error {
+			if name == "" {
+				return fmt.Errorf("Eval name cannot be empty")
+			}
+			return nil
+		})
+	}
+
+	if description == "" {
+		description = tui.Input(logger, "How should we describe what the "+name+" eval does?", "The description of the eval is optional but helpful for understanding its purpose")
+	}
+
+	return name, description
+}
+
+func generateEvalFile(logger logger.Logger, projectDir string, evalID string, slug string, name string, description string) error {
+	// Always generate TypeScript files for evals
+	ext := ".ts"
+
+	// Create evals directory if it doesn't exist
+	evalsDir := filepath.Join(projectDir, "src", "evals")
+	if err := os.MkdirAll(evalsDir, 0755); err != nil {
+		return fmt.Errorf("failed to create evals directory: %w", err)
+	}
+
+	// Generate file path
+	filename := filepath.Join(evalsDir, slug+ext)
+
+	// Check if file already exists
+	if util.Exists(filename) {
+		return fmt.Errorf("eval file already exists: %s", filename)
+	}
+
+	// Generate TypeScript content with metadata
+	content := fmt.Sprintf(`import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export const metadata = {
+  id: '%s',
+  slug: '%s',
+  name: '%s',
+  description: '%s'
+};
+
+/**
+ * %s
+ * %s
+ */
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  const { input, output } = req;
+
+  // TODO: Implement your evaluation logic here
+  // Example: Score the output based on some criteria
+
+  const score = 0.8; // Replace with your actual scoring logic
+  const metadata = {
+    reasoning: 'Replace with your evaluation reasoning'
+  };
+
+  res.score(score, metadata);
+}
+`, evalID, slug, name, description, name, description)
+
+	// Write file
+	if err := os.WriteFile(filename, []byte(content), 0644); err != nil {
+		return fmt.Errorf("failed to write eval file: %w", err)
+	}
+
+	logger.Debug("Created eval file: %s", filename)
+	return nil
+}
+
+var evalCreateCmd = &cobra.Command{
+	Use:     "create [name] [description]",
+	Short:   "Create a new evaluation function",
+	Aliases: []string{"new"},
+	Args:    cobra.MaximumNArgs(2),
+	Run: func(cmd *cobra.Command, args []string) {
+		ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM)
+		defer cancel()
+		logger := env.NewLogger(cmd)
+		theproject := project.EnsureProject(ctx, cmd)
+		apikey := theproject.Token
+		urls := util.GetURLs(logger)
+		apiUrl := urls.API
+
+		var name string
+		var description string
+
+		if len(args) > 0 {
+			name = args[0]
+		}
+
+		if len(args) > 1 {
+			description = args[1]
+		}
+
+		name, description = getEvalInfoFlow(logger, name, description)
+
+		// Generate slug from name
+		isPython := theproject.Project.Bundler.Language == "python"
+		slug := util.SafeProjectFilename(strings.ToLower(name), isPython)
+
+		var evalID string
+		var evalErr error
+
+		action := func() {
+			// Create eval via API
+			evalID, evalErr = eval.CreateEval(ctx, logger, apiUrl, apikey, theproject.Project.ProjectId, slug, name, description)
+			if evalErr != nil {
+				errsystem.New(errsystem.ErrApiRequest, evalErr, errsystem.WithContextMessage("Failed to create eval")).ShowErrorAndExit()
+			}
+
+			logger.Debug("Created eval with ID: %s", evalID)
+
+			// Generate eval file (always TypeScript) with the real ID from API
+			if err := generateEvalFile(logger, theproject.Dir, evalID, slug, name, description); err != nil {
+				errsystem.New(errsystem.ErrOpenFile, err, errsystem.WithContextMessage("Failed to create eval file")).ShowErrorAndExit()
+			}
+		}
+
+		tui.ShowSpinner("Creating evaluation ...", action)
+
+		format, _ := cmd.Flags().GetString("format")
+		if format == "json" {
+			result := map[string]string{
+				"id":          evalID,
+				"slug":        slug,
+				"name":        name,
+				"description": description,
+			}
+			json.NewEncoder(os.Stdout).Encode(result)
+		} else {
+			tui.ShowSuccess("Evaluation created successfully")
+			fmt.Printf("\nFile created: %s\n", tui.Muted(fmt.Sprintf("src/evals/%s.ts", slug)))
+		}
+	},
+}
+
+func init() {
+	rootCmd.AddCommand(evalCmd)
+	evalCmd.AddCommand(evalCreateCmd)
+
+	for _, cmd := range []*cobra.Command{evalCreateCmd} {
+		cmd.Flags().StringP("dir", "d", "", "The project directory")
+		cmd.Flags().String("format", "text", "The format to use for the output. Can be either 'text' or 'json'")
+	}
+}
diff --git a/error_codes.yaml b/error_codes.yaml
@@ -91,3 +91,9 @@ errors:
 
   - code: CLI-0029
     message: Failed to retrieve devmode endpoint
+
+  - code: CLI-0030
+    message: Breaking change migration required
+
+  - code: CLI-0031
+    message: SDK update required