Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/docs/04-command-line-reference/gptscript.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ gptscript [flags] PROGRAM_FILE [INPUT...]
--confirm Prompt before running potentially dangerous commands ($GPTSCRIPT_CONFIRM)
--credential-context strings Context name(s) in which to store credentials ($GPTSCRIPT_CREDENTIAL_CONTEXT)
--credential-override strings Credentials to override (ex: --credential-override github.com/example/cred-tool:API_TOKEN=1234) ($GPTSCRIPT_CREDENTIAL_OVERRIDE)
--dataset-tool-repo string The repo to use for dataset tools ($GPTSCRIPT_DATASET_TOOL_REPO) (default "github.com/gptscript-ai/datasets")
--debug Enable debug logging ($GPTSCRIPT_DEBUG)
--debug-messages Enable logging of chat completion calls ($GPTSCRIPT_DEBUG_MESSAGES)
--default-model string Default LLM model to use ($GPTSCRIPT_DEFAULT_MODEL) (default "gpt-4o")
Expand Down
2 changes: 2 additions & 0 deletions pkg/cli/gptscript.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ type GPTScript struct {
SaveChatStateFile string `usage:"A file to save the chat state to so that a conversation can be resumed with --chat-state" local:"true"`
DefaultModelProvider string `usage:"Default LLM model provider to use, this will override OpenAI settings"`
GithubEnterpriseHostname string `usage:"The host name for a Github Enterprise instance to enable for remote loading" local:"true"`
DatasetToolRepo string `usage:"The repo to use for dataset tools" default:"github.com/gptscript-ai/datasets" local:"true"`

readData []byte
}
Expand Down Expand Up @@ -146,6 +147,7 @@ func (r *GPTScript) NewGPTScriptOpts() (gptscript.Options, error) {
Workspace: r.Workspace,
DisablePromptServer: r.UI,
DefaultModelProvider: r.DefaultModelProvider,
DatasetToolRepo: r.DatasetToolRepo,
}

if r.Confirm {
Expand Down
7 changes: 7 additions & 0 deletions pkg/gptscript/gptscript.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ import (

var log = mvl.Package()

const defaultDatasetToolRepo = "github.com/gptscript-ai/datasets"

type GPTScript struct {
Registry *llm.Registry
Runner *runner.Runner
Expand All @@ -51,6 +53,7 @@ type Options struct {
CredentialContexts []string
Quiet *bool
Workspace string
DatasetToolRepo string
DisablePromptServer bool
Env []string
}
Expand All @@ -66,6 +69,7 @@ func Complete(opts ...Options) Options {
result.CredentialContexts = opt.CredentialContexts
result.Quiet = types.FirstSet(opt.Quiet, result.Quiet)
result.Workspace = types.FirstSet(opt.Workspace, result.Workspace)
result.DatasetToolRepo = types.FirstSet(opt.DatasetToolRepo, result.DatasetToolRepo)
result.Env = append(result.Env, opt.Env...)
result.DisablePromptServer = types.FirstSet(opt.DisablePromptServer, result.DisablePromptServer)
result.DefaultModelProvider = types.FirstSet(opt.DefaultModelProvider, result.DefaultModelProvider)
Expand All @@ -80,6 +84,9 @@ func Complete(opts ...Options) Options {
if len(result.CredentialContexts) == 0 {
result.CredentialContexts = []string{credentials.DefaultCredentialContext}
}
if result.DatasetToolRepo == "" {
result.DatasetToolRepo = defaultDatasetToolRepo
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is needed given that you have a default set.

I do believe it is needed here, though, when running the SDK server in embedded mode:

func complete(opts ...Options) Options {

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. This is fixed


return result
}
Expand Down
329 changes: 329 additions & 0 deletions pkg/sdkserver/datasets.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,329 @@
package sdkserver

import (
"encoding/json"
"fmt"
"net/http"

gcontext "github.com/gptscript-ai/gptscript/pkg/context"
"github.com/gptscript-ai/gptscript/pkg/gptscript"
"github.com/gptscript-ai/gptscript/pkg/loader"
)

type datasetRequest struct {
Input string `json:"input"`
Workspace string `json:"workspace"`
DatasetToolRepo string `json:"datasetToolRepo"`
}

func (r datasetRequest) validate(requireInput bool) error {
if r.Workspace == "" {
return fmt.Errorf("workspace is required")
} else if requireInput && r.Input == "" {
return fmt.Errorf("input is required")
}
return nil
}

func (r datasetRequest) opts(o gptscript.Options) gptscript.Options {
opts := gptscript.Options{
Cache: o.Cache,
Monitor: o.Monitor,
Runner: o.Runner,
DatasetToolRepo: o.DatasetToolRepo,
Workspace: r.Workspace,
}
if r.DatasetToolRepo != "" {
opts.DatasetToolRepo = r.DatasetToolRepo
}
return opts
}

func (s *server) listDatasets(w http.ResponseWriter, r *http.Request) {
logger := gcontext.GetLogger(r.Context())

var req datasetRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(logger, w, http.StatusBadRequest, fmt.Errorf("failed to decode request body: %w", err))
return
}

if err := req.validate(false); err != nil {
writeError(logger, w, http.StatusBadRequest, err)
return
}

g, err := gptscript.New(r.Context(), req.opts(s.gptscriptOpts))
if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to initialize gptscript: %w", err))
return
}

prg, err := loader.Program(r.Context(), "List Datasets from "+s.gptscriptOpts.DatasetToolRepo, "", loader.Options{
Cache: g.Cache,
})

if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to load program: %w", err))
return
}

result, err := g.Run(r.Context(), prg, s.gptscriptOpts.Env, req.Input)
if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to run program: %w", err))
return
}

writeResponse(logger, w, map[string]any{"stdout": result})
}

type createDatasetArgs struct {
Name string `json:"dataset_name"`
Description string `json:"dataset_description"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: you have camelCase and snake_case in your json tags. Can this be standardized?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This went the opposite way I expected. Is there reason why you are doing snake_case? I think most of our stuff uses camelCase.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lol. I honestly have no idea. I'll go change it all to camelCase.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Sorry for the delay. Got stuck working on a test for another repo

}

func (a createDatasetArgs) validate() error {
if a.Name == "" {
return fmt.Errorf("dataset_name is required")
}
return nil
}

func (s *server) createDataset(w http.ResponseWriter, r *http.Request) {
logger := gcontext.GetLogger(r.Context())

var req datasetRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(logger, w, http.StatusBadRequest, fmt.Errorf("failed to decode request body: %w", err))
return
}

if err := req.validate(true); err != nil {
writeError(logger, w, http.StatusBadRequest, err)
return
}

g, err := gptscript.New(r.Context(), req.opts(s.gptscriptOpts))
if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to initialize gptscript: %w", err))
return
}

var args createDatasetArgs
if err := json.Unmarshal([]byte(req.Input), &args); err != nil {
writeError(logger, w, http.StatusBadRequest, fmt.Errorf("failed to unmarshal input: %w", err))
return
}

if err := args.validate(); err != nil {
writeError(logger, w, http.StatusBadRequest, err)
return
}

prg, err := loader.Program(r.Context(), "Create Dataset from "+s.gptscriptOpts.DatasetToolRepo, "", loader.Options{
Cache: g.Cache,
})

if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to load program: %w", err))
return
}

result, err := g.Run(r.Context(), prg, s.gptscriptOpts.Env, req.Input)
if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to run program: %w", err))
return
}

writeResponse(logger, w, map[string]any{"stdout": result})
}

type addDatasetElementArgs struct {
DatasetID string `json:"dataset_id"`
ElementName string `json:"element_name"`
ElementDescription string `json:"element_description"`
ElementContent string `json:"element_content"`
}

func (a addDatasetElementArgs) validate() error {
if a.DatasetID == "" {
return fmt.Errorf("dataset_id is required")
}
if a.ElementName == "" {
return fmt.Errorf("element_name is required")
}
if a.ElementContent == "" {
return fmt.Errorf("element_content is required")
}
return nil
}

func (s *server) addDatasetElement(w http.ResponseWriter, r *http.Request) {
logger := gcontext.GetLogger(r.Context())

var req datasetRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(logger, w, http.StatusBadRequest, fmt.Errorf("failed to decode request body: %w", err))
return
}

if err := req.validate(true); err != nil {
writeError(logger, w, http.StatusBadRequest, err)
return
}

g, err := gptscript.New(r.Context(), req.opts(s.gptscriptOpts))
if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to initialize gptscript: %w", err))
return
}

var args addDatasetElementArgs
if err := json.Unmarshal([]byte(req.Input), &args); err != nil {
writeError(logger, w, http.StatusBadRequest, fmt.Errorf("failed to unmarshal input: %w", err))
return
}

if err := args.validate(); err != nil {
writeError(logger, w, http.StatusBadRequest, err)
return
}

prg, err := loader.Program(r.Context(), "Add Element from "+s.gptscriptOpts.DatasetToolRepo, "", loader.Options{
Cache: g.Cache,
})
if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to load program: %w", err))
return
}

result, err := g.Run(r.Context(), prg, s.gptscriptOpts.Env, req.Input)
if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to run program: %w", err))
return
}

writeResponse(logger, w, map[string]any{"stdout": result})
}

type listDatasetElementsArgs struct {
DatasetID string `json:"dataset_id"`
}

func (a listDatasetElementsArgs) validate() error {
if a.DatasetID == "" {
return fmt.Errorf("dataset_id is required")
}
return nil
}

func (s *server) listDatasetElements(w http.ResponseWriter, r *http.Request) {
logger := gcontext.GetLogger(r.Context())

var req datasetRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(logger, w, http.StatusBadRequest, fmt.Errorf("failed to decode request body: %w", err))
return
}

if err := req.validate(true); err != nil {
writeError(logger, w, http.StatusBadRequest, err)
return
}

g, err := gptscript.New(r.Context(), req.opts(s.gptscriptOpts))
if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to initialize gptscript: %w", err))
return
}

var args listDatasetElementsArgs
if err := json.Unmarshal([]byte(req.Input), &args); err != nil {
writeError(logger, w, http.StatusBadRequest, fmt.Errorf("failed to unmarshal input: %w", err))
return
}

if err := args.validate(); err != nil {
writeError(logger, w, http.StatusBadRequest, err)
return
}

prg, err := loader.Program(r.Context(), "List Elements from "+s.gptscriptOpts.DatasetToolRepo, "", loader.Options{
Cache: g.Cache,
})
if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to load program: %w", err))
return
}

result, err := g.Run(r.Context(), prg, s.gptscriptOpts.Env, req.Input)
if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to run program: %w", err))
return
}

writeResponse(logger, w, map[string]any{"stdout": result})
}

type getDatasetElementArgs struct {
DatasetID string `json:"dataset_id"`
Element string `json:"element"`
}

func (a getDatasetElementArgs) validate() error {
if a.DatasetID == "" {
return fmt.Errorf("dataset_id is required")
}
if a.Element == "" {
return fmt.Errorf("element is required")
}
return nil
}

func (s *server) getDatasetElement(w http.ResponseWriter, r *http.Request) {
logger := gcontext.GetLogger(r.Context())

var req datasetRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
writeError(logger, w, http.StatusBadRequest, fmt.Errorf("failed to decode request body: %w", err))
return
}

if err := req.validate(true); err != nil {
writeError(logger, w, http.StatusBadRequest, err)
return
}

g, err := gptscript.New(r.Context(), req.opts(s.gptscriptOpts))
if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to initialize gptscript: %w", err))
return
}

var args getDatasetElementArgs
if err := json.Unmarshal([]byte(req.Input), &args); err != nil {
writeError(logger, w, http.StatusBadRequest, fmt.Errorf("failed to unmarshal input: %w", err))
return
}

if err := args.validate(); err != nil {
writeError(logger, w, http.StatusBadRequest, err)
return
}

prg, err := loader.Program(r.Context(), "Get Element from "+s.gptscriptOpts.DatasetToolRepo, "", loader.Options{
Cache: g.Cache,
})
if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to load program: %w", err))
return
}

result, err := g.Run(r.Context(), prg, s.gptscriptOpts.Env, req.Input)
if err != nil {
writeError(logger, w, http.StatusInternalServerError, fmt.Errorf("failed to run program: %w", err))
return
}

writeResponse(logger, w, map[string]any{"stdout": result})
}
Loading