From b68cba0fe874d17a25d29785e81447ddcf21e500 Mon Sep 17 00:00:00 2001 From: Michael de Hoog Date: Thu, 23 Dec 2021 13:33:38 -0600 Subject: [PATCH 1/9] Use token type --- managed_repository.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/managed_repository.go b/managed_repository.go index 4ce3cae..c8657f8 100644 --- a/managed_repository.go +++ b/managed_repository.go @@ -200,7 +200,7 @@ func (r *managedRepository) fetchUpstream() (err error) { err = status.Errorf(codes.Internal, "cannot obtain an OAuth2 access token for the server: %v", err) return err } - err = runGit(op, r.localDiskPath, "-c", "http.extraHeader=Authorization: Bearer "+t.AccessToken, "fetch", "--progress", "-f", "-n", "origin", "refs/heads/*:refs/heads/*", "refs/changes/*:refs/changes/*") + err = runGit(op, r.localDiskPath, "-c", "http.extraHeader=Authorization: "+t.Type()+" "+t.AccessToken, "fetch", "--progress", "-f", "-n", "origin", "refs/heads/*:refs/heads/*", "refs/changes/*:refs/changes/*") } if err == nil { t, err = r.config.TokenSource.Token() @@ -208,7 +208,7 @@ func (r *managedRepository) fetchUpstream() (err error) { err = status.Errorf(codes.Internal, "cannot obtain an OAuth2 access token for the server: %v", err) return err } - err = runGit(op, r.localDiskPath, "-c", "http.extraHeader=Authorization: Bearer "+t.AccessToken, "fetch", "--progress", "-f", "origin") + err = runGit(op, r.localDiskPath, "-c", "http.extraHeader=Authorization: "+t.Type()+" "+t.AccessToken, "fetch", "--progress", "-f", "origin") } logStats("fetch", startTime, err) if err == nil { From a826bb21b15a36182d292cbed600f17e338fb7fa Mon Sep 17 00:00:00 2001 From: Michael de Hoog Date: Thu, 23 Dec 2021 19:23:49 -0600 Subject: [PATCH 2/9] Pass upstream URL to token generation --- goblet-server/main.go | 5 ++++- goblet.go | 2 +- managed_repository.go | 6 +++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/goblet-server/main.go b/goblet-server/main.go index cad2a0a..cba15f3 100644 --- a/goblet-server/main.go +++ b/goblet-server/main.go @@ -35,6 +35,7 @@ import ( "github.com/google/uuid" "go.opencensus.io/stats/view" "go.opencensus.io/tag" + "golang.org/x/oauth2" "golang.org/x/oauth2/google" logpb "google.golang.org/genproto/googleapis/logging/v2" @@ -230,7 +231,9 @@ func main() { LocalDiskCacheRoot: *cacheRoot, URLCanonializer: googlehook.CanonicalizeURL, RequestAuthorizer: authorizer, - TokenSource: ts, + TokenSource: func(upstreamURL *url.URL) (*oauth2.Token, error) { + return ts.Token() + }, ErrorReporter: er, RequestLogger: rl, LongRunningOperationLogger: lrol, diff --git a/goblet.go b/goblet.go index b6641b2..179fe5a 100644 --- a/goblet.go +++ b/goblet.go @@ -64,7 +64,7 @@ type ServerConfig struct { RequestAuthorizer func(*http.Request) error - TokenSource oauth2.TokenSource + TokenSource func(upstreamURL *url.URL) (*oauth2.Token, error) ErrorReporter func(*http.Request, error) diff --git a/managed_repository.go b/managed_repository.go index 4ce3cae..e0025b7 100644 --- a/managed_repository.go +++ b/managed_repository.go @@ -132,7 +132,7 @@ func (r *managedRepository) lsRefsUpstream(command []*gitprotocolio.ProtocolV2Re if err != nil { return nil, status.Errorf(codes.Internal, "cannot construct a request object: %v", err) } - t, err := r.config.TokenSource.Token() + t, err := r.config.TokenSource(r.upstreamURL) if err != nil { return nil, status.Errorf(codes.Internal, "cannot obtain an OAuth2 access token for the server: %v", err) } @@ -195,7 +195,7 @@ func (r *managedRepository) fetchUpstream() (err error) { defer r.mu.Unlock() if splitGitFetch { // Fetch heads and changes first. - t, err = r.config.TokenSource.Token() + t, err = r.config.TokenSource(r.upstreamURL) if err != nil { err = status.Errorf(codes.Internal, "cannot obtain an OAuth2 access token for the server: %v", err) return err @@ -203,7 +203,7 @@ func (r *managedRepository) fetchUpstream() (err error) { err = runGit(op, r.localDiskPath, "-c", "http.extraHeader=Authorization: Bearer "+t.AccessToken, "fetch", "--progress", "-f", "-n", "origin", "refs/heads/*:refs/heads/*", "refs/changes/*:refs/changes/*") } if err == nil { - t, err = r.config.TokenSource.Token() + t, err = r.config.TokenSource(r.upstreamURL) if err != nil { err = status.Errorf(codes.Internal, "cannot obtain an OAuth2 access token for the server: %v", err) return err From ce8ea69d7121a2fa2d03c148cca094ee2c06efda Mon Sep 17 00:00:00 2001 From: Jan Roehrich Date: Tue, 28 Nov 2023 20:15:54 +0100 Subject: [PATCH 3/9] Adapt .gitignore for Jetbrains IDE use --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0992301..4692519 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /goblet-server/goblet-server /bazel-* +/.idea From ebbf5d72f3541e7d76322eaa937215615c14f4e4 Mon Sep 17 00:00:00 2001 From: Jan Roehrich Date: Tue, 28 Nov 2023 20:16:28 +0100 Subject: [PATCH 4/9] Also compute Authorization headers for get fetch similar to ls-ref --- managed_repository.go | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/managed_repository.go b/managed_repository.go index 4ce3cae..bb803bc 100644 --- a/managed_repository.go +++ b/managed_repository.go @@ -35,7 +35,6 @@ import ( "github.com/google/gitprotocolio" "go.opencensus.io/stats" "go.opencensus.io/tag" - "golang.org/x/oauth2" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) @@ -136,10 +135,10 @@ func (r *managedRepository) lsRefsUpstream(command []*gitprotocolio.ProtocolV2Re if err != nil { return nil, status.Errorf(codes.Internal, "cannot obtain an OAuth2 access token for the server: %v", err) } + t.SetAuthHeader(req) req.Header.Add("Content-Type", "application/x-git-upload-pack-request") req.Header.Add("Accept", "application/x-git-upload-pack-result") req.Header.Add("Git-Protocol", "version=2") - t.SetAuthHeader(req) startTime := time.Now() resp, err := http.DefaultClient.Do(req) @@ -189,26 +188,22 @@ func (r *managedRepository) fetchUpstream() (err error) { splitGitFetch = true } - var t *oauth2.Token + req := http.Request{Header: make(http.Header, 1)} + t, err := r.config.TokenSource.Token() + if err != nil { + return status.Errorf(codes.Internal, "cannot obtain an OAuth2 access token for the server: %v", err) + } + t.SetAuthHeader(&req) + startTime := time.Now() r.mu.Lock() defer r.mu.Unlock() if splitGitFetch { // Fetch heads and changes first. - t, err = r.config.TokenSource.Token() - if err != nil { - err = status.Errorf(codes.Internal, "cannot obtain an OAuth2 access token for the server: %v", err) - return err - } - err = runGit(op, r.localDiskPath, "-c", "http.extraHeader=Authorization: Bearer "+t.AccessToken, "fetch", "--progress", "-f", "-n", "origin", "refs/heads/*:refs/heads/*", "refs/changes/*:refs/changes/*") + err = runGit(op, r.localDiskPath, "-c", fmt.Sprintf("http.extraHeader=%s: %s", "Authorization", req.Header.Get("Authorization")), "fetch", "--progress", "-f", "-n", "origin", "refs/heads/*:refs/heads/*", "refs/changes/*:refs/changes/*") } if err == nil { - t, err = r.config.TokenSource.Token() - if err != nil { - err = status.Errorf(codes.Internal, "cannot obtain an OAuth2 access token for the server: %v", err) - return err - } - err = runGit(op, r.localDiskPath, "-c", "http.extraHeader=Authorization: Bearer "+t.AccessToken, "fetch", "--progress", "-f", "origin") + err = runGit(op, r.localDiskPath, "-c", fmt.Sprintf("http.extraHeader=%s: %s", "Authorization", req.Header.Get("Authorization")), "fetch", "--progress", "-f", "origin") } logStats("fetch", startTime, err) if err == nil { From 83ae3120e3e32cae21a59e86dd2441e4c6050ba9 Mon Sep 17 00:00:00 2001 From: Jan Roehrich Date: Wed, 29 Nov 2023 12:07:49 +0100 Subject: [PATCH 5/9] Remove Google specific implementations --- BUILD | 1 - go.mod | 11 +- goblet-server/BUILD | 27 ---- goblet-server/main.go | 317 ------------------------------------------ goblet_deps.bzl | 6 - google/BUILD | 21 --- google/backup.go | 303 ---------------------------------------- google/hooks.go | 182 ------------------------ reporting.go | 44 +++++- 9 files changed, 43 insertions(+), 869 deletions(-) delete mode 100644 goblet-server/BUILD delete mode 100644 goblet-server/main.go delete mode 100644 google/BUILD delete mode 100644 google/backup.go delete mode 100644 google/hooks.go diff --git a/BUILD b/BUILD index 3ded448..39cbd97 100644 --- a/BUILD +++ b/BUILD @@ -21,7 +21,6 @@ go_library( "@com_github_go_git_go_git_v5//:go_default_library", "@com_github_go_git_go_git_v5//plumbing:go_default_library", "@com_github_google_gitprotocolio//:go_default_library", - "@com_github_grpc_ecosystem_grpc_gateway//runtime:go_default_library", "@io_opencensus_go//stats:go_default_library", "@io_opencensus_go//tag:go_default_library", "@org_golang_google_grpc//codes:go_default_library", diff --git a/go.mod b/go.mod index 7cc7ff1..d81c490 100644 --- a/go.mod +++ b/go.mod @@ -1,20 +1,13 @@ module github.com/google/goblet -go 1.12 +go 1.16.5 require ( - cloud.google.com/go v0.86.0 - cloud.google.com/go/logging v1.4.2 - cloud.google.com/go/storage v1.16.0 - contrib.go.opencensus.io/exporter/stackdriver v0.13.1 github.com/Microsoft/go-winio v0.5.0 // indirect github.com/ProtonMail/go-crypto v0.0.0-20210705153151-cc34b1f6908b // indirect - github.com/aws/aws-sdk-go v1.30.7 // indirect github.com/go-git/go-git/v5 v5.4.2 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/google/gitprotocolio v0.0.0-20210704173409-b5a56823ae52 - github.com/google/uuid v1.1.2 - github.com/grpc-ecosystem/grpc-gateway v1.16.0 github.com/kevinburke/ssh_config v1.1.0 // indirect github.com/sergi/go-diff v1.2.0 // indirect go.opencensus.io v0.23.0 @@ -22,7 +15,5 @@ require ( golang.org/x/net v0.0.0-20210614182718-04defd469f4e // indirect golang.org/x/oauth2 v0.0.0-20210628180205-a41e5a781914 golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c // indirect - google.golang.org/api v0.50.0 - google.golang.org/genproto v0.0.0-20210708141623-e76da96a951f google.golang.org/grpc v1.39.0 ) diff --git a/goblet-server/BUILD b/goblet-server/BUILD deleted file mode 100644 index 7ae40f6..0000000 --- a/goblet-server/BUILD +++ /dev/null @@ -1,27 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library") - -go_library( - name = "go_default_library", - srcs = ["main.go"], - importpath = "github.com/google/goblet/goblet-server", - visibility = ["//visibility:private"], - deps = [ - "//:go_default_library", - "//google:go_default_library", - "@com_github_google_uuid//:go_default_library", - "@com_google_cloud_go//errorreporting:go_default_library", - "@com_google_cloud_go_logging//:go_default_library", - "@com_google_cloud_go_storage//:go_default_library", - "@go_googleapis//google/logging/v2:logging_go_proto", - "@io_opencensus_go//stats/view:go_default_library", - "@io_opencensus_go//tag:go_default_library", - "@io_opencensus_go_contrib_exporter_stackdriver//:go_default_library", - "@org_golang_x_oauth2//google:go_default_library", - ], -) - -go_binary( - name = "goblet-server", - embed = [":go_default_library"], - visibility = ["//visibility:public"], -) diff --git a/goblet-server/main.go b/goblet-server/main.go deleted file mode 100644 index cad2a0a..0000000 --- a/goblet-server/main.go +++ /dev/null @@ -1,317 +0,0 @@ -// Copyright 2019 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "context" - "flag" - "fmt" - "io" - "log" - "net/http" - "net/http/httputil" - "net/url" - "os" - "time" - - "cloud.google.com/go/errorreporting" - "cloud.google.com/go/logging" - "cloud.google.com/go/storage" - "contrib.go.opencensus.io/exporter/stackdriver" - "github.com/google/goblet" - googlehook "github.com/google/goblet/google" - "github.com/google/uuid" - "go.opencensus.io/stats/view" - "go.opencensus.io/tag" - "golang.org/x/oauth2/google" - - logpb "google.golang.org/genproto/googleapis/logging/v2" -) - -const ( - scopeCloudPlatform = "https://www.googleapis.com/auth/cloud-platform" - scopeUserInfoEmail = "https://www.googleapis.com/auth/userinfo.email" -) - -var ( - port = flag.Int("port", 8080, "port to listen to") - cacheRoot = flag.String("cache_root", "", "Root directory of cached repositories") - - stackdriverProject = flag.String("stackdriver_project", "", "GCP project ID used for the Stackdriver integration") - stackdriverLoggingLogID = flag.String("stackdriver_logging_log_id", "", "Stackdriver logging Log ID") - - backupBucketName = flag.String("backup_bucket_name", "", "Name of the GCS bucket for backed-up repositories") - backupManifestName = flag.String("backup_manifest_name", "", "Name of the backup manifest") - - latencyDistributionAggregation = view.Distribution( - 100, - 200, - 400, - 800, - 1000, // 1s - 2000, - 4000, - 8000, - 10000, // 10s - 20000, - 40000, - 80000, - 100000, // 100s - 200000, - 400000, - 800000, - 1000000, // 1000s - 2000000, - 4000000, - 8000000, - ) - views = []*view.View{ - { - Name: "github.com/google/goblet/inbound-command-count", - Description: "Inbound command count", - TagKeys: []tag.Key{goblet.CommandTypeKey, goblet.CommandCanonicalStatusKey, goblet.CommandCacheStateKey}, - Measure: goblet.InboundCommandCount, - Aggregation: view.Count(), - }, - { - Name: "github.com/google/goblet/inbound-command-latency", - Description: "Inbound command latency", - TagKeys: []tag.Key{goblet.CommandTypeKey, goblet.CommandCanonicalStatusKey, goblet.CommandCacheStateKey}, - Measure: goblet.InboundCommandProcessingTime, - Aggregation: latencyDistributionAggregation, - }, - { - Name: "github.com/google/goblet/outbound-command-count", - Description: "Outbound command count", - TagKeys: []tag.Key{goblet.CommandTypeKey, goblet.CommandCanonicalStatusKey}, - Measure: goblet.OutboundCommandCount, - Aggregation: view.Count(), - }, - { - Name: "github.com/google/goblet/outbound-command-latency", - Description: "Outbound command latency", - TagKeys: []tag.Key{goblet.CommandTypeKey, goblet.CommandCanonicalStatusKey}, - Measure: goblet.OutboundCommandProcessingTime, - Aggregation: latencyDistributionAggregation, - }, - { - Name: "github.com/google/goblet/upstream-fetch-blocking-time", - Description: "Duration that requests are waiting for git-fetch from the upstream", - Measure: goblet.UpstreamFetchWaitingTime, - Aggregation: latencyDistributionAggregation, - }, - } -) - -func main() { - flag.Parse() - - ts, err := google.DefaultTokenSource(context.Background(), scopeCloudPlatform, scopeUserInfoEmail) - if err != nil { - log.Fatalf("Cannot initialize the OAuth2 token source: %v", err) - } - authorizer, err := googlehook.NewRequestAuthorizer(ts) - if err != nil { - log.Fatalf("Cannot create a request authorizer: %v", err) - } - if err := view.Register(views...); err != nil { - log.Fatal(err) - } - - var er func(*http.Request, error) - var rl func(r *http.Request, status int, requestSize, responseSize int64, latency time.Duration) = func(r *http.Request, status int, requestSize, responseSize int64, latency time.Duration) { - dump, err := httputil.DumpRequest(r, false) - if err != nil { - return - } - log.Printf("%q %d reqsize: %d, respsize %d, latency: %v", dump, status, requestSize, responseSize, latency) - } - var lrol func(string, *url.URL) goblet.RunningOperation = func(action string, u *url.URL) goblet.RunningOperation { - log.Printf("Starting %s for %s", action, u.String()) - return &logBasedOperation{action, u} - } - var backupLogger *log.Logger = log.New(os.Stderr, "", log.LstdFlags) - if *stackdriverProject != "" { - // Error reporter - ec, err := errorreporting.NewClient(context.Background(), *stackdriverProject, errorreporting.Config{ - ServiceName: "goblet", - }) - if err != nil { - log.Fatalf("Cannot create a Stackdriver errorreporting client: %v", err) - } - defer func() { - if err := ec.Close(); err != nil { - log.Printf("Failed to report errors to Stackdriver: %v", err) - } - }() - er = func(r *http.Request, err error) { - ec.Report(errorreporting.Entry{ - Req: r, - Error: err, - }) - log.Printf("Error while processing a request: %v", err) - } - - if *stackdriverLoggingLogID != "" { - lc, err := logging.NewClient(context.Background(), *stackdriverProject) - if err != nil { - log.Fatalf("Cannot create a Stackdriver logging client: %v", err) - } - defer func() { - if err := lc.Close(); err != nil { - log.Printf("Failed to log requests to Stackdriver: %v", err) - } - }() - - // Request logger - sdLogger := lc.Logger(*stackdriverLoggingLogID) - rl = func(r *http.Request, status int, requestSize, responseSize int64, latency time.Duration) { - sdLogger.Log(logging.Entry{ - HTTPRequest: &logging.HTTPRequest{ - Request: r, - RequestSize: requestSize, - Status: status, - ResponseSize: responseSize, - Latency: latency, - RemoteIP: r.RemoteAddr, - }, - }) - } - lrol = func(action string, u *url.URL) goblet.RunningOperation { - op := &stackdriverBasedOperation{ - sdLogger: sdLogger, - action: action, - u: u, - startTime: time.Now(), - id: uuid.New().String(), - } - op.sdLogger.Log(logging.Entry{ - Payload: &LongRunningOperation{ - Action: op.action, - URL: op.u.String(), - }, - Operation: &logpb.LogEntryOperation{ - Id: op.id, - Producer: "github.com/google/goblet", - First: true, - }, - }) - return op - } - // Backup logger - backupLogger = sdLogger.StandardLogger(logging.Warning) - } - - // OpenCensus view exporters. - exporter, err := stackdriver.NewExporter(stackdriver.Options{ - ProjectID: *stackdriverProject, - }) - if err != nil { - log.Fatal(err) - } - if err = exporter.StartMetricsExporter(); err != nil { - log.Fatal(err) - } - } - - config := &goblet.ServerConfig{ - LocalDiskCacheRoot: *cacheRoot, - URLCanonializer: googlehook.CanonicalizeURL, - RequestAuthorizer: authorizer, - TokenSource: ts, - ErrorReporter: er, - RequestLogger: rl, - LongRunningOperationLogger: lrol, - } - - if *backupBucketName != "" && *backupManifestName != "" { - gsClient, err := storage.NewClient(context.Background()) - if err != nil { - log.Fatal(err) - } - - googlehook.RunBackupProcess(config, gsClient.Bucket(*backupBucketName), *backupManifestName, backupLogger) - } - - http.HandleFunc("/healthz", func(w http.ResponseWriter, req *http.Request) { - w.Header().Set("Content-Type", "text/plain") - io.WriteString(w, "ok\n") - }) - http.Handle("/", goblet.HTTPHandler(config)) - log.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", *port), nil)) -} - -type LongRunningOperation struct { - Action string `json:"action"` - URL string `json:"url"` - DurationMs int `json:"duration_msec,omitempty"` - Error string `json:"error,omitempty"` - ProgressMessage string `json:"progress_message,omitempty"` -} - -type logBasedOperation struct { - action string - u *url.URL -} - -func (op *logBasedOperation) Printf(format string, a ...interface{}) { - log.Printf("Progress %s (%s): %s", op.action, op.u.String(), fmt.Sprintf(format, a...)) -} - -func (op *logBasedOperation) Done(err error) { - log.Printf("Finished %s for %s: %v", op.action, op.u.String(), err) -} - -type stackdriverBasedOperation struct { - sdLogger *logging.Logger - action string - u *url.URL - startTime time.Time - id string -} - -func (op *stackdriverBasedOperation) Printf(format string, a ...interface{}) { - lro := &LongRunningOperation{ - Action: op.action, - URL: op.u.String(), - ProgressMessage: fmt.Sprintf(format, a...), - } - op.sdLogger.Log(logging.Entry{ - Payload: lro, - Operation: &logpb.LogEntryOperation{ - Id: op.id, - Producer: "github.com/google/goblet", - }, - }) -} - -func (op *stackdriverBasedOperation) Done(err error) { - lro := &LongRunningOperation{ - Action: op.action, - URL: op.u.String(), - DurationMs: int(time.Since(op.startTime) / time.Millisecond), - } - if err != nil { - lro.Error = err.Error() - } - op.sdLogger.Log(logging.Entry{ - Payload: lro, - Operation: &logpb.LogEntryOperation{ - Id: op.id, - Producer: "github.com/google/goblet", - Last: true, - }, - }) -} diff --git a/goblet_deps.bzl b/goblet_deps.bzl index a8a1b91..514e489 100644 --- a/goblet_deps.bzl +++ b/goblet_deps.bzl @@ -272,12 +272,6 @@ def goblet_deps(): sum = "h1:sjZBwGj9Jlw33ImPtvFviGYvseOtDM7hkSKB7+Tv3SM=", version = "v2.0.5", ) - go_repository( - name = "com_github_grpc_ecosystem_grpc_gateway", - importpath = "github.com/grpc-ecosystem/grpc-gateway", - sum = "h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=", - version = "v1.16.0", - ) go_repository( name = "com_github_hashicorp_golang_lru", importpath = "github.com/hashicorp/golang-lru", diff --git a/google/BUILD b/google/BUILD deleted file mode 100644 index 7b48872..0000000 --- a/google/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "go_default_library", - srcs = [ - "backup.go", - "hooks.go", - ], - importpath = "github.com/google/goblet/google", - visibility = ["//visibility:public"], - deps = [ - "//:go_default_library", - "@com_google_cloud_go_storage//:go_default_library", - "@org_golang_google_api//iterator:go_default_library", - "@org_golang_google_api//oauth2/v2:go_default_library", - "@org_golang_google_api//option:go_default_library", - "@org_golang_google_grpc//codes:go_default_library", - "@org_golang_google_grpc//status:go_default_library", - "@org_golang_x_oauth2//:go_default_library", - ], -) diff --git a/google/backup.go b/google/backup.go deleted file mode 100644 index c6658c8..0000000 --- a/google/backup.go +++ /dev/null @@ -1,303 +0,0 @@ -// Copyright 2019 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package google - -import ( - "bufio" - "context" - "fmt" - "io" - "log" - "net/url" - "os" - "path" - "path/filepath" - "sort" - "strconv" - "strings" - "time" - - "cloud.google.com/go/storage" - "github.com/google/goblet" - "google.golang.org/api/iterator" -) - -const ( - gobletRepoManifestDir = "goblet-repository-manifests" - - manifestCleanUpDuration = 24 * time.Hour - - backupFrequency = time.Hour -) - -func RunBackupProcess(config *goblet.ServerConfig, bh *storage.BucketHandle, manifestName string, logger *log.Logger) { - rw := &backupReaderWriter{ - bucketHandle: bh, - manifestName: manifestName, - config: config, - logger: logger, - } - rw.recoverFromBackup() - go func() { - timer := time.NewTimer(backupFrequency) - for { - select { - case <-timer.C: - rw.saveBackup() - } - timer.Reset(backupFrequency) - } - }() -} - -type backupReaderWriter struct { - bucketHandle *storage.BucketHandle - manifestName string - config *goblet.ServerConfig - logger *log.Logger -} - -func (b *backupReaderWriter) recoverFromBackup() { - repos := b.readRepoList() - if repos == nil || len(repos) == 0 { - b.logger.Print("No repositories found from backup") - return - } - - for rawURL, _ := range repos { - u, err := url.Parse(rawURL) - if err != nil { - b.logger.Printf("Cannot parse %s as a URL. Skipping", rawURL) - continue - } - - bundlePath, err := b.downloadBackupBundle(path.Join(u.Host, u.Path)) - if err != nil { - b.logger.Printf("Cannot find the backup bundle for %s. Skipping: %v", rawURL, err) - continue - } - - m, err := goblet.OpenManagedRepository(b.config, u) - if err != nil { - b.logger.Printf("Cannot open a managed repository for %s. Skipping: %v", rawURL, err) - continue - } - - m.RecoverFromBundle(bundlePath) - os.Remove(bundlePath) - } -} - -func (b *backupReaderWriter) readRepoList() map[string]bool { - it := b.bucketHandle.Objects(context.Background(), &storage.Query{ - Delimiter: "/", - Prefix: path.Join(gobletRepoManifestDir, b.manifestName) + "/", - }) - repos := map[string]bool{} - for { - attrs, err := it.Next() - if err == iterator.Done { - break - } - if err != nil { - b.logger.Printf("Error while finding the manifests: %v", err) - return nil - } - if attrs.Name == "" { - continue - } - - b.readManifest(attrs.Name, repos) - } - return repos -} - -func (b *backupReaderWriter) readManifest(name string, m map[string]bool) { - rc, err := b.bucketHandle.Object(name).NewReader(context.Background()) - if err != nil { - b.logger.Printf("Cannot open a manifest file %s. Skipping: %v", name, err) - return - } - defer rc.Close() - - sc := bufio.NewScanner(rc) - for sc.Scan() { - m[strings.TrimSpace(sc.Text())] = true - } - if err := sc.Err(); err != nil { - b.logger.Printf("Error while reading a manifest file %s. Skipping the rest of the file: %v", name, err) - } -} - -func (b *backupReaderWriter) downloadBackupBundle(name string) (string, error) { - _, name, err := b.gcBundle(name) - if name == "" { - return "", fmt.Errorf("cannot find the bundle for %s: %v", name, err) - } - - rc, err := b.bucketHandle.Object(name).NewReader(context.Background()) - if err != nil { - return "", err - } - defer rc.Close() - - tmpBundlePath := filepath.Join(b.config.LocalDiskCacheRoot, "tmp-bundle") - fi, err := os.OpenFile(tmpBundlePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) - if err != nil { - return "", err - } - defer fi.Close() - - if _, err := io.Copy(fi, rc); err != nil { - return "", err - } - return tmpBundlePath, nil -} - -func (b *backupReaderWriter) saveBackup() { - urls := []string{} - goblet.ListManagedRepositories(func(m goblet.ManagedRepository) { - u := m.UpstreamURL() - latestBundleSecPrecision, _, err := b.gcBundle(path.Join(u.Host, u.Path)) - if err != nil { - b.logger.Printf("cannot GC bundles for %s. Skipping: %v", u.String(), err) - return - } - // The bundle timestmap is seconds precision. - if latestBundleSecPrecision.Unix() >= m.LastUpdateTime().Unix() { - b.logger.Printf("existing bundle for %s is up-to-date %s", u.String(), latestBundleSecPrecision.Format(time.RFC3339)) - } else if err := b.backupManagedRepo(m); err != nil { - b.logger.Printf("cannot make a backup for %s. Skipping: %v", u.String(), err) - return - } - - urls = append(urls, u.String()) - }) - - now := time.Now() - manifestFile := path.Join(gobletRepoManifestDir, b.manifestName, fmt.Sprintf("%012d", now.Unix())) - if err := b.writeManifestFile(manifestFile, urls); err != nil { - b.logger.Printf("cannot create %s: %v", manifestFile, err) - return - } - - b.garbageCollectOldManifests(now) -} - -func (b *backupReaderWriter) gcBundle(name string) (time.Time, string, error) { - names := []string{} - it := b.bucketHandle.Objects(context.Background(), &storage.Query{ - Delimiter: "/", - Prefix: name + "/", - }) - for { - attrs, err := it.Next() - if err == iterator.Done { - break - } - if err != nil { - return time.Time{}, "", fmt.Errorf("error while finding the bundles to GC: %v", err) - } - if attrs.Name == "" { - continue - } - - names = append(names, attrs.Name) - } - - bundles := []string{} - for _, name := range names { - // Ignore non-bundles. - if _, err := strconv.ParseInt(path.Base(names[0]), 10, 64); err != nil { - continue - } - bundles = append(bundles, name) - } - - if len(bundles) == 0 { - // No backup found. - return time.Time{}, "", nil - } - sort.Sort(sort.Reverse(sort.StringSlice(bundles))) - - for _, name := range bundles[1:len(bundles)] { - b.bucketHandle.Object(name).Delete(context.Background()) - } - n, _ := strconv.ParseInt(path.Base(bundles[0]), 10, 64) - return time.Unix(n, 0), bundles[0], nil -} - -func (b *backupReaderWriter) backupManagedRepo(m goblet.ManagedRepository) error { - u := m.UpstreamURL() - bundleFile := path.Join(u.Host, u.Path, fmt.Sprintf("%012d", m.LastUpdateTime().Unix())) - - ctx, cf := context.WithCancel(context.Background()) - defer cf() - - wc := b.bucketHandle.Object(bundleFile).NewWriter(ctx) - if err := m.WriteBundle(wc); err != nil { - return err - } - // Closing here will commit the file. Otherwise, the cancelled context - // will discard the file. - wc.Close() - return nil -} - -func (b *backupReaderWriter) writeManifestFile(manifestFile string, urls []string) error { - ctx, cf := context.WithCancel(context.Background()) - defer cf() - - wc := b.bucketHandle.Object(manifestFile).NewWriter(ctx) - for _, url := range urls { - if _, err := io.WriteString(wc, url+"\n"); err != nil { - return err - } - } - // Closing here will commit the file. Otherwise, the cancelled context - // will discard the file. - wc.Close() - return nil -} - -func (b *backupReaderWriter) garbageCollectOldManifests(now time.Time) { - threshold := now.Add(-manifestCleanUpDuration) - it := b.bucketHandle.Objects(context.Background(), &storage.Query{ - Delimiter: "/", - Prefix: path.Join(gobletRepoManifestDir, b.manifestName) + "/", - }) - for { - attrs, err := it.Next() - if err == iterator.Done { - break - } - if err != nil { - b.logger.Printf("Error while finding the manifests to GC: %v", err) - return - } - if attrs.Prefix != "" { - continue - } - - sec, err := strconv.ParseInt(path.Base(attrs.Name), 10, 64) - if err != nil { - continue - } - t := time.Unix(sec, 0) - if t.Before(threshold) { - b.bucketHandle.Object(attrs.Name).Delete(context.Background()) - } - } -} diff --git a/google/hooks.go b/google/hooks.go deleted file mode 100644 index 8062767..0000000 --- a/google/hooks.go +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2019 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package google - -import ( - "context" - "encoding/base64" - "fmt" - "net/http" - "net/url" - "strings" - - "golang.org/x/oauth2" - oauth2cli "google.golang.org/api/oauth2/v2" - "google.golang.org/api/option" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" -) - -const ( - scopeCloudPlatform = "https://www.googleapis.com/auth/cloud-platform" - scopeUserInfoEmail = "https://www.googleapis.com/auth/userinfo.email" -) - -// NewRequestAuthorizer returns a function that checks the authorization header -// and authorize the request. -func NewRequestAuthorizer(ts oauth2.TokenSource) (func(*http.Request) error, error) { - // Restrict the access to the proxy to the same user as the server's - // service account. This makes sure that the server won't expose the - // contents that the proxy clients cannot access, and the access - // auditing is done properly. - - oauth2Service, err := oauth2cli.NewService(context.Background(), option.WithTokenSource(ts)) - if err != nil { - return nil, fmt.Errorf("cannot initialize the OAuth2 service: %v", err) - } - - // Get the server's service account. - t, err := ts.Token() - if err != nil { - return nil, fmt.Errorf("cannot obtain an OAuth2 access token for the server: %v", err) - } - c := oauth2Service.Tokeninfo() - c.AccessToken(t.AccessToken) - ti, err := c.Do() - if err != nil { - return nil, fmt.Errorf("failed to call OAuth2 TokenInfo: %v", err) - } - - // Check that the server setup is correct. - hasCloudPlatform, hasUserInfoEmail := scopeCheck(ti.Scope) - if !hasCloudPlatform { - return nil, fmt.Errorf("the server credential doesn't have %s scope. This is needed to access upstream repositories.", scopeCloudPlatform) - } - if !hasUserInfoEmail { - return nil, fmt.Errorf("the server credential doesn't have %s scope. This is needed to get the email address of the service account.", scopeUserInfoEmail) - } - if ti.Email == "" { - return nil, fmt.Errorf("cannot obtain the server's service account email") - } - - email := ti.Email - return func(r *http.Request) error { - if h := r.Header.Get("Authorization"); h != "" { - return authorizeAuthzHeader(oauth2Service, email, h) - } - if c, err := r.Cookie("o"); err == nil { - return authorizeCookie(oauth2Service, email, c.Value) - } - return status.Error(codes.Unauthenticated, "no auth token") - }, nil -} - -func authorizeAuthzHeader(oauth2Service *oauth2cli.Service, email, authorizationHeader string) error { - accessToken := "" - if strings.HasPrefix(authorizationHeader, "Bearer ") { - accessToken = strings.TrimPrefix(authorizationHeader, "Bearer ") - } else if strings.HasPrefix(authorizationHeader, "Basic ") { - bs, err := base64.StdEncoding.DecodeString(strings.TrimPrefix(authorizationHeader, "Basic ")) - if err != nil { - return status.Error(codes.Unauthenticated, "cannot parse the Authorization header") - } - s := string(bs) - i := strings.IndexByte(s, ':') - if i < 0 { - return status.Error(codes.Unauthenticated, "cannot parse the Authorization header") - } - accessToken = s[i+1:] - } else { - return status.Error(codes.Unauthenticated, "no bearer token") - } - return authorizeAccessToken(oauth2Service, email, accessToken) -} - -func authorizeCookie(oauth2Service *oauth2cli.Service, email, oCookie string) error { - if strings.ContainsRune(oCookie, '=') { - oCookie = strings.SplitN(oCookie, "=", 2)[1] - } - return authorizeAccessToken(oauth2Service, email, oCookie) -} - -func authorizeAccessToken(oauth2Service *oauth2cli.Service, email, accessToken string) error { - c := oauth2Service.Tokeninfo() - c.AccessToken(accessToken) - ti, err := c.Do() - if err != nil { - return status.Errorf(codes.Unavailable, "cannot call OAuth2 TokenInfo: %v", err) - } - - hasCloudPlatform, hasUserInfoEmail := scopeCheck(ti.Scope) - if !hasCloudPlatform { - return status.Errorf(codes.Unauthenticated, "access token doesn't have %s", scopeCloudPlatform) - } - if !hasUserInfoEmail { - return status.Errorf(codes.Unauthenticated, "access token doesn't have %s", scopeUserInfoEmail) - } - - if ti.Email != email { - // Do not send the server's service account email so that a - // stranger cannot know the server's service account. The proxy - // server should be running in a private network, but this is - // an extra protection. - return status.Errorf(codes.Unauthenticated, "access token attests a different user %s", ti.Email) - } - - return nil -} - -// CanonicalizeURL returns a canonicalized URL for googlesource.com and source.developers.google.com. -func CanonicalizeURL(u *url.URL) (*url.URL, error) { - ret := url.URL{} - ret.Scheme = "https" - ret.Host = u.Host - ret.Path = u.Path - - if strings.HasSuffix(ret.Host, ".googlesource.com") { - if strings.HasPrefix(ret.Path, "/a/") { - // Force authorization prefix. - ret.Path = strings.TrimPrefix(ret.Path, "/a") - } - } else if ret.Host == "source.developers.google.com" { - // Do nothing. - } else { - return nil, status.Errorf(codes.InvalidArgument, "unsupported host: %s", u.Host) - } - // Git endpoint suffixes. - if strings.HasSuffix(ret.Path, "/info/refs") { - ret.Path = strings.TrimSuffix(ret.Path, "/info/refs") - } else if strings.HasSuffix(ret.Path, "/git-upload-pack") { - ret.Path = strings.TrimSuffix(ret.Path, "/git-upload-pack") - } else if strings.HasSuffix(ret.Path, "/git-receive-pack") { - ret.Path = strings.TrimSuffix(ret.Path, "/git-receive-pack") - } - ret.Path = strings.TrimSuffix(ret.Path, ".git") - return &ret, nil -} - -func scopeCheck(scopes string) (bool, bool) { - hasCloudPlatform := false - hasUserInfoEmail := false - for _, scope := range strings.Split(scopes, " ") { - if scope == scopeCloudPlatform { - hasCloudPlatform = true - } - if scope == scopeUserInfoEmail { - hasUserInfoEmail = true - } - } - return hasCloudPlatform, hasUserInfoEmail -} diff --git a/reporting.go b/reporting.go index 274f073..97f7e74 100644 --- a/reporting.go +++ b/reporting.go @@ -21,7 +21,6 @@ import ( "net/http" "time" - "github.com/grpc-ecosystem/grpc-gateway/runtime" "go.opencensus.io/stats" "go.opencensus.io/tag" "google.golang.org/grpc/codes" @@ -61,7 +60,7 @@ func (h *httpErrorReporter) reportError(err error) { h.w.Header().Add("WWW-Authenticate", "Bearer") h.w.Header().Add("WWW-Authenticate", "Basic realm=goblet") } - httpStatus := runtime.HTTPStatusFromCode(code) + httpStatus := httpStatusFromCode(code) if message == "" { message = http.StatusText(httpStatus) } @@ -177,3 +176,44 @@ func (w *monitoringWriter) WriteHeader(status int) { func (w *monitoringWriter) Header() http.Header { return w.w.Header() } + +func httpStatusFromCode(code codes.Code) int { + switch code { + case codes.OK: + return http.StatusOK + case codes.Canceled: + return http.StatusRequestTimeout + case codes.Unknown: + return http.StatusInternalServerError + case codes.InvalidArgument: + return http.StatusBadRequest + case codes.DeadlineExceeded: + return http.StatusGatewayTimeout + case codes.NotFound: + return http.StatusNotFound + case codes.AlreadyExists: + return http.StatusConflict + case codes.PermissionDenied: + return http.StatusForbidden + case codes.Unauthenticated: + return http.StatusUnauthorized + case codes.ResourceExhausted: + return http.StatusTooManyRequests + case codes.FailedPrecondition: + // Note, this deliberately doesn't translate to the similarly named '412 Precondition Failed' HTTP response status. + return http.StatusBadRequest + case codes.Aborted: + return http.StatusConflict + case codes.OutOfRange: + return http.StatusBadRequest + case codes.Unimplemented: + return http.StatusNotImplemented + case codes.Internal: + return http.StatusInternalServerError + case codes.Unavailable: + return http.StatusServiceUnavailable + case codes.DataLoss: + return http.StatusInternalServerError + } + return http.StatusInternalServerError +} From 2b7e66b12b7d9da4f82975ec32dcd209c80ce00e Mon Sep 17 00:00:00 2001 From: Jacob Repp Date: Fri, 7 Nov 2025 02:19:08 -0800 Subject: [PATCH 6/9] docs: add RFC-002 on GitHub OAuth and multi-tenancy architecture Comprehensive analysis of GitHub Enterprise and public GitHub OAuth support with respect to multi-tenancy isolation concerns. Covers: - Current state analysis of authentication flows - GitHub authentication models (Apps, PATs, OAuth Apps) - Multi-tenancy isolation requirements and threat model - Technical architecture for secure multi-tenant operation - Implementation strategy (5 phases) - Tradeoffs and recommendations - Migration path from current to full implementation Key findings: - PR #7 provides critical foundation (URL-aware tokens, dynamic type) - Complete solution requires: authorization layer + token manager + cache partitioning - GitHub Apps recommended for production multi-tenant (automatic rotation, org-scoped) - Estimated 12-16 weeks for full implementation Related: PR #7, RFC-001 --- .../rfc-002-github-oauth-multi-tenancy.md | 1204 +++++++++++++++++ 1 file changed, 1204 insertions(+) create mode 100644 docs/architecture/rfc-002-github-oauth-multi-tenancy.md diff --git a/docs/architecture/rfc-002-github-oauth-multi-tenancy.md b/docs/architecture/rfc-002-github-oauth-multi-tenancy.md new file mode 100644 index 0000000..3b27ccd --- /dev/null +++ b/docs/architecture/rfc-002-github-oauth-multi-tenancy.md @@ -0,0 +1,1204 @@ +# RFC-002: GitHub OAuth and Multi-Tenancy Authentication Architecture + +**Status:** Draft +**Author:** System Architecture Team +**Created:** 2025-11-07 +**Related:** PR #7, RFC-001 (Secure Multi-Tenant Cache) + +## Executive Summary + +This RFC provides a comprehensive analysis of GitHub Enterprise and public GitHub OAuth support in the context of multi-tenant Git caching proxy deployments. It examines the authentication models, authorization flows, token management strategies, and isolation requirements necessary for secure multi-tenant operations. + +**Key Finding:** PR #7's changes (upstream URL-aware token generation + dynamic token type support) are **critical enablers** for secure multi-tenant GitHub caching but are **not sufficient** on their own. Complete multi-tenant isolation requires integration with request-level authorization and cache partitioning. + +## Table of Contents + +1. [Background](#background) +2. [Current State Analysis](#current-state-analysis) +3. [GitHub Authentication Models](#github-authentication-models) +4. [Multi-Tenancy Isolation Requirements](#multi-tenancy-isolation-requirements) +5. [Technical Architecture](#technical-architecture) +6. [Implementation Strategy](#implementation-strategy) +7. [Tradeoffs and Recommendations](#tradeoffs-and-recommendations) +8. [Security Considerations](#security-considerations) +9. [Migration Path](#migration-path) + +--- + +## Background + +### Problem Statement + +Goblet currently implements a Git caching proxy with two distinct authentication layers: + +1. **Client → Goblet Authentication** (RequestAuthorizer) + - Who can access the cache? + - Uses OIDC/Bearer tokens + - Identity: email, groups, subject + +2. **Goblet → Upstream Authentication** (TokenSource) + - How does Goblet authenticate to upstream Git servers? + - Uses OAuth2 tokens + - **Current limitation:** Single token for all upstreams + +### The Challenge + +In multi-tenant scenarios with private repositories: + +``` +Tenant A (Org: acme-corp) Tenant B (Org: megacorp) + ↓ (OIDC: alice@acme.com) ↓ (OIDC: bob@mega.com) + ↓ ↓ + Goblet Cache + ↓ (Need: acme-corp token) ↓ (Need: megacorp token) + ↓ ↓ +github.com/acme-corp/repo github.com/megacorp/repo +``` + +**Problems without PR #7:** +- ❌ Single `TokenSource` for all organizations +- ❌ Cannot generate org-specific tokens +- ❌ Hardcoded "Bearer" breaks GitHub Enterprise + +**Problems even with PR #7:** +- ⚠️ No automatic tenant → upstream mapping +- ⚠️ No cache isolation enforcement +- ⚠️ No token scope validation + +--- + +## Current State Analysis + +### Existing Architecture + +```mermaid +sequenceDiagram + participant Client + participant Goblet + participant GitHub + + Client->>Goblet: HTTP Request + Authorization: Bearer + Note over Goblet: RequestAuthorizer validates OIDC token + Note over Goblet: Extracts: email, groups, sub + Goblet->>GitHub: git-upload-pack + Authorization: + Note over Goblet: TokenSource(upstreamURL) generates token + GitHub-->>Goblet: Pack data + Goblet-->>Client: Cached response +``` + +### Current Authentication Flows + +#### 1. Client Authentication (Inbound) + +**Location:** `auth/oidc/authorizer.go` + +```go +type Claims struct { + Email string `json:"email"` + EmailVerified bool `json:"email_verified"` + Name string `json:"name"` + Groups []string `json:"groups"` + Subject string `json:"sub"` +} + +func (a *Authorizer) AuthorizeRequest(r *http.Request) error { + token := ExtractBearerToken(r) + idToken, err := a.verifier.VerifyIDToken(r.Context(), token) + claims, err := GetClaims(idToken) + // Store claims in context + ctx := context.WithValue(r.Context(), claimsKey, claims) + *r = *r.WithContext(ctx) + return nil +} +``` + +**Capabilities:** +- ✅ Verifies user identity via OIDC +- ✅ Extracts user metadata (email, groups) +- ✅ Stores claims in request context +- ❌ Does NOT enforce repository-level authorization +- ❌ Does NOT map user to upstream credentials + +#### 2. Upstream Authentication (Outbound) + +**Location:** `goblet.go`, `managed_repository.go` + +**Before PR #7:** +```go +type ServerConfig struct { + TokenSource oauth2.TokenSource // Single token for ALL upstreams +} + +// In managed_repository.go +t, err := r.config.TokenSource.Token() +req.Header.Add("Authorization", "Bearer "+t.AccessToken) +``` + +**After PR #7:** +```go +type ServerConfig struct { + TokenSource func(upstreamURL *url.URL) (*oauth2.Token, error) +} + +// In managed_repository.go +t, err := r.config.TokenSource(r.upstreamURL) +if t.AccessToken != "" { + req.Header.Add("Authorization", t.Type()+" "+t.AccessToken) +} +``` + +**Improvements from PR #7:** +- ✅ Upstream URL passed to token generator +- ✅ Dynamic token type (Bearer vs Basic) +- ✅ Enables org-specific token generation +- ✅ GitHub Enterprise PAT support + +--- + +## GitHub Authentication Models + +### 1. GitHub.com (Public GitHub) + +#### Personal Access Tokens (PATs) + +**Classic PATs:** +``` +Format: ghp_xxxxxxxxxxxxxxxxxxxx +Type: Bearer (or Basic for GHE) +Scope: User-level permissions +Max: 1 year expiration +``` + +**Fine-Grained PATs:** +``` +Format: github_pat_xxxxxxxxxxxxxxxxxxxx +Type: Bearer +Scope: Repository-specific permissions +Max: 1 year expiration +Per-organization access control +``` + +**Pros:** +- Simple to generate +- No app registration needed +- Per-repo fine-grained permissions + +**Cons:** +- User-scoped (not organization) +- Manual rotation required +- Cannot distinguish between tenants +- Requires secure storage + +**Multi-Tenant Viability:** ⚠️ Limited +- Cannot map user identity to PAT +- Each tenant needs separate PAT +- No automatic rotation + +#### GitHub Apps + +**Installation Tokens:** +``` +Format: ghs_xxxxxxxxxxxxxxxxxxxx +Type: Bearer +Scope: Per-installation (org-level) +Expiration: 1 hour (automatic) +``` + +**Architecture:** +``` +GitHub App (app_id: 123456) + ├── Installation 1 (org: acme-corp, id: 111) + ├── Installation 2 (org: megacorp, id: 222) + └── Installation 3 (org: startup-co, id: 333) + +Each installation → independent token +``` + +**Token Generation:** +```go +func getInstallationToken(orgName string) (*oauth2.Token, error) { + // 1. Create JWT signed with app private key + jwt := createJWT(appID, privateKey) + + // 2. Look up installation ID for org + installationID := getInstallationID(orgName) + + // 3. Request installation token + token := exchangeJWTForToken(jwt, installationID) + + return &oauth2.Token{ + AccessToken: token, + TokenType: "Bearer", + Expiry: time.Now().Add(1 * time.Hour), + }, nil +} +``` + +**Pros:** +- ✅ Automatic token rotation (1 hour) +- ✅ Organization-scoped +- ✅ Audit log per installation +- ✅ Fine-grained repository permissions +- ✅ No user credentials needed + +**Cons:** +- Requires app registration and approval +- Complex setup (JWT signing) +- Rate limits per installation + +**Multi-Tenant Viability:** ✅ Excellent +- Perfect isolation (org → installation → token) +- Automatic expiration +- Audit trail + +#### OAuth Apps + +**OAuth Access Tokens:** +``` +Format: gho_xxxxxxxxxxxxxxxxxxxx +Type: Bearer +Scope: User permissions on behalf of user +Expiration: No automatic expiry +``` + +**Pros:** +- User-level authorization +- Can act on behalf of user + +**Cons:** +- ❌ User must be online to authorize +- ❌ No organization-level control +- ❌ Manual token management + +**Multi-Tenant Viability:** ❌ Poor +- Requires user interaction +- Not suitable for server-to-server + +### 2. GitHub Enterprise Server (GHE) + +#### Key Differences + +**Authentication:** +``` +Public GitHub: Authorization: Bearer ghp_xxxx +GHE: Authorization: Basic + OR + Authorization: token ghp_xxxx +``` + +**Token Type Handling:** +```go +// PR #7 enables this: +token := &oauth2.Token{ + AccessToken: "ghp_enterprise_token", + TokenType: "Basic", // GHE expects Basic +} + +// managed_repository.go now uses: +header := "Authorization: " + t.Type() + " " + t.AccessToken +// Result: "Authorization: Basic ghp_enterprise_token" +``` + +**SAML/LDAP Integration:** +- GHE often uses SAML SSO +- Token generation may require SAML assertion +- Additional complexity for token mapping + +--- + +## Multi-Tenancy Isolation Requirements + +### Security Requirements Matrix + +| Requirement | Current State | With PR #7 | Full Implementation | +|-------------|---------------|------------|---------------------| +| **Client Authentication** | ✅ OIDC | ✅ OIDC | ✅ OIDC | +| **Client Authorization** | ❌ No repo-level checks | ❌ No repo-level checks | ✅ Per-repo ACL | +| **Upstream Token Selection** | ❌ Single token | ✅ URL-based | ✅ Tenant-aware | +| **Cache Isolation** | ❌ Shared cache | ❌ Shared cache | ✅ Partitioned | +| **Token Type Support** | ❌ Bearer only | ✅ Dynamic | ✅ Dynamic | +| **Audit Logging** | ⚠️ Partial | ⚠️ Partial | ✅ Complete | + +### Threat Model + +#### T1: Cross-Tenant Cache Access + +**Scenario:** +``` +1. Alice (tenant-a) requests: github.com/acme/secret-repo +2. Goblet caches to: /cache/github.com/acme/secret-repo +3. Bob (tenant-b) requests: github.com/acme/secret-repo +4. Goblet serves cached data to Bob (❌ UNAUTHORIZED) +``` + +**Current Mitigation:** None +**With PR #7:** None (PR #7 doesn't address cache isolation) +**Required:** Cache partitioning by tenant ID + +#### T2: Token Misuse + +**Scenario:** +``` +1. Goblet has token for org-a +2. User from org-b requests: github.com/org-a/repo +3. Goblet uses org-a token to fetch repo +4. User gains access to org-a data via org-b credentials +``` + +**Current Mitigation:** None +**With PR #7:** Enables org-specific tokens but doesn't enforce mapping +**Required:** Authorization layer to validate user→org→token + +#### T3: Token Leakage + +**Scenario:** +``` +1. Installation token expires +2. Stale token remains in memory/logs +3. Attacker extracts token from logs +4. 1-hour window for misuse +``` + +**Current Mitigation:** None +**With PR #7:** None (token storage unchanged) +**Required:** Secure token storage, automatic rotation, audit logging + +--- + +## Technical Architecture + +### Proposed Multi-Tenant Authentication Flow + +```mermaid +sequenceDiagram + participant Client + participant Goblet + participant AuthZ as Authorization Layer + participant TokenMgr as Token Manager + participant GitHub + + Client->>Goblet: git fetch + Auth: Bearer + Goblet->>AuthZ: ValidateRequest(claims, repoURL) + Note over AuthZ: Extract tenant ID from claims + Note over AuthZ: Check if tenant can access repo + AuthZ-->>Goblet: OK (tenant_id, org_name) + + Goblet->>TokenMgr: GetToken(upstreamURL, tenant_id) + Note over TokenMgr: Map tenant → GitHub org → installation + Note over TokenMgr: Generate/refresh token for org + TokenMgr-->>Goblet: Token (type, value, expiry) + + Goblet->>GitHub: git-upload-pack + Auth: + GitHub-->>Goblet: Pack data + Note over Goblet: Cache in tenant-specific path + Note over Goblet: /cache// + Goblet-->>Client: Response +``` + +### Component Design + +#### 1. Authorization Layer + +**Purpose:** Enforce repository-level access control + +```go +type Authorizer interface { + // ValidateAccess checks if a user can access a repository + ValidateAccess(ctx context.Context, claims *oidc.Claims, repoURL *url.URL) (*AuthzDecision, error) +} + +type AuthzDecision struct { + Allowed bool + TenantID string // tenant-a, tenant-b + OrgName string // acme-corp, megacorp + RepoAccess []string // ["read", "clone"] + DenialReason string +} + +type PolicyEngine struct { + // Map user claims → tenant → allowed repos + policies map[string]*TenantPolicy +} + +type TenantPolicy struct { + TenantID string + GitHubOrgs []string + AllowedRepos []string // Patterns: "github.com/acme/*" + DeniedRepos []string // Patterns: "github.com/acme/secret-*" +} + +func (pe *PolicyEngine) ValidateAccess( + ctx context.Context, + claims *oidc.Claims, + repoURL *url.URL, +) (*AuthzDecision, error) { + // 1. Extract tenant ID from claims + tenantID := extractTenantFromEmail(claims.Email) + // alice@acme-corp.com → tenant_id: acme-corp + + // 2. Load tenant policy + policy := pe.policies[tenantID] + if policy == nil { + return &AuthzDecision{ + Allowed: false, + DenialReason: "no policy for tenant", + }, nil + } + + // 3. Check if repo matches allowed patterns + allowed := matchesPatterns(repoURL.String(), policy.AllowedRepos) + denied := matchesPatterns(repoURL.String(), policy.DeniedRepos) + + if denied { + return &AuthzDecision{ + Allowed: false, + DenialReason: "repo explicitly denied", + }, nil + } + + if !allowed { + return &AuthzDecision{ + Allowed: false, + DenialReason: "repo not in allow list", + }, nil + } + + // 4. Extract GitHub org from URL + orgName := extractGitHubOrg(repoURL) + // github.com/acme-corp/repo → org: acme-corp + + return &AuthzDecision{ + Allowed: true, + TenantID: tenantID, + OrgName: orgName, + RepoAccess: []string{"read", "clone"}, + }, nil +} +``` + +#### 2. Token Manager + +**Purpose:** Generate org-specific tokens with proper lifecycle + +```go +type TokenManager interface { + GetToken(upstreamURL *url.URL, tenantID string) (*oauth2.Token, error) +} + +type GitHubAppTokenManager struct { + appID int64 + privateKey *rsa.PrivateKey + installations map[string]int64 // org_name → installation_id + tokenCache map[string]*cachedToken + mu sync.RWMutex +} + +type cachedToken struct { + token *oauth2.Token + expiry time.Time + orgName string + tenantID string +} + +func (tm *GitHubAppTokenManager) GetToken( + upstreamURL *url.URL, + tenantID string, +) (*oauth2.Token, error) { + // 1. Extract org from URL + orgName := extractGitHubOrg(upstreamURL) + // github.com/acme-corp/repo → acme-corp + + // 2. Check cache + cacheKey := fmt.Sprintf("%s:%s", tenantID, orgName) + tm.mu.RLock() + cached := tm.tokenCache[cacheKey] + tm.mu.RUnlock() + + if cached != nil && time.Now().Before(cached.expiry.Add(-5*time.Minute)) { + // Return cached token (with 5min safety margin) + return cached.token, nil + } + + // 3. Look up installation ID + installationID, ok := tm.installations[orgName] + if !ok { + return nil, fmt.Errorf("no GitHub App installation for org: %s", orgName) + } + + // 4. Generate JWT for GitHub App authentication + jwt, err := tm.generateJWT() + if err != nil { + return nil, fmt.Errorf("failed to generate JWT: %w", err) + } + + // 5. Request installation token + token, err := tm.exchangeJWTForInstallationToken(jwt, installationID) + if err != nil { + return nil, fmt.Errorf("failed to get installation token: %w", err) + } + + // 6. Cache token + tm.mu.Lock() + tm.tokenCache[cacheKey] = &cachedToken{ + token: token, + expiry: token.Expiry, + orgName: orgName, + tenantID: tenantID, + } + tm.mu.Unlock() + + return token, nil +} + +func (tm *GitHubAppTokenManager) generateJWT() (string, error) { + // Create JWT claims + now := time.Now() + claims := jwt.MapClaims{ + "iat": now.Unix(), + "exp": now.Add(10 * time.Minute).Unix(), + "iss": strconv.FormatInt(tm.appID, 10), + } + + // Sign with app private key + token := jwt.NewWithClaims(jwt.SigningMethodRS256, claims) + return token.SignedString(tm.privateKey) +} + +func (tm *GitHubAppTokenManager) exchangeJWTForInstallationToken( + jwtToken string, + installationID int64, +) (*oauth2.Token, error) { + // POST to GitHub API + url := fmt.Sprintf( + "https://api.github.com/app/installations/%d/access_tokens", + installationID, + ) + + req, _ := http.NewRequest("POST", url, nil) + req.Header.Set("Authorization", "Bearer "+jwtToken) + req.Header.Set("Accept", "application/vnd.github+json") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + var result struct { + Token string `json:"token"` + ExpiresAt time.Time `json:"expires_at"` + } + json.NewDecoder(resp.Body).Decode(&result) + + return &oauth2.Token{ + AccessToken: result.Token, + TokenType: "Bearer", + Expiry: result.ExpiresAt, + }, nil +} +``` + +#### 3. Cache Partitioning + +**Purpose:** Isolate cached data per tenant + +```go +func (ic *IsolationConfig) GetCachePath( + r *http.Request, + cacheRoot string, + repoURL *url.URL, +) (string, error) { + // Extract tenant ID from request context + claims, ok := oidc.GetClaimsFromContext(r.Context()) + if !ok { + return "", fmt.Errorf("no claims in context") + } + + tenantID := extractTenantFromEmail(claims.Email) + // alice@acme-corp.com → tenant_id: acme-corp + + // Construct tenant-isolated cache path + basePath := filepath.Join( + repoURL.Host, + repoURL.Path, + ) + + return filepath.Join(cacheRoot, tenantID, basePath), nil +} +``` + +### Integration with PR #7 + +**PR #7 provides the foundation:** + +```go +// In goblet-server/main.go +TokenSource: func(upstreamURL *url.URL) (*oauth2.Token, error) { + // BEFORE PR #7: Could not access upstreamURL + // AFTER PR #7: Can generate org-specific tokens + + // Extract tenant from request context (requires additional work) + tenantID := getTenantFromContext() + + // Use TokenManager to get org-specific token + return tokenManager.GetToken(upstreamURL, tenantID) +} +``` + +**What PR #7 enables:** +1. ✅ Upstream URL available for token generation +2. ✅ Can extract GitHub org from URL +3. ✅ Can map org → installation → token +4. ✅ Dynamic token type for GHE support + +**What still needs implementation:** +1. ❌ Pass tenant context to TokenSource +2. ❌ Implement TokenManager +3. ❌ Integrate Authorization Layer +4. ❌ Implement cache partitioning + +--- + +## Implementation Strategy + +### Phase 1: Foundation (PR #7) ✅ COMPLETE + +**Goal:** Enable upstream-aware token generation + +**Completed:** +- Modified `TokenSource` signature to accept `upstreamURL` +- Updated all token generation call sites +- Dynamic token type support (Bearer vs Basic) + +### Phase 2: Authorization Layer + +**Goal:** Enforce repository-level access control + +**Tasks:** +1. Define `Authorizer` interface +2. Implement `PolicyEngine` with tenant policies +3. Integrate with `http_proxy_server.go` +4. Extract and validate tenant from OIDC claims +5. Validate repo access per tenant +6. Add audit logging for authorization decisions + +**Estimated Effort:** 2-3 weeks + +**Files to modify:** +- `http_proxy_server.go` - Add authorization check +- `auth/authz/` - New package for authorization +- `auth/oidc/authorizer.go` - Extract tenant ID + +### Phase 3: Token Manager with GitHub App Support + +**Goal:** Implement org-specific token generation + +**Tasks:** +1. Implement `TokenManager` interface +2. Create `GitHubAppTokenManager` with JWT signing +3. Configuration for GitHub App (app_id, private_key, installations) +4. Token caching with automatic refresh +5. Integration with `TokenSource` function +6. Support for fallback to PATs + +**Estimated Effort:** 3-4 weeks + +**Files to modify:** +- `auth/tokens/` - New package for token management +- `goblet-server/main.go` - Configure TokenManager +- Configuration file - Add GitHub App settings + +### Phase 4: Cache Partitioning + +**Goal:** Isolate cache per tenant + +**Tasks:** +1. Integrate isolation.go into main codebase +2. Modify cache path generation to include tenant ID +3. Update all cache read/write operations +4. Migration tool for existing cache +5. Testing with multiple tenants + +**Estimated Effort:** 2-3 weeks + +**Files to modify:** +- `managed_repository.go` - Use partitioned cache paths +- `isolation.go` - Move from prototype to pkg/ +- Cache migration tool + +### Phase 5: Security Hardening + +**Goal:** Production-ready security + +**Tasks:** +1. Secure token storage (encrypted at rest) +2. Token rotation automation +3. Comprehensive audit logging +4. Rate limiting per tenant +5. Security testing and penetration testing + +**Estimated Effort:** 2-3 weeks + +--- + +## Tradeoffs and Recommendations + +### Token Management Strategies + +#### Option 1: GitHub Apps (RECOMMENDED) + +**Pros:** +- ✅ Automatic token rotation (1 hour expiry) +- ✅ Organization-scoped isolation +- ✅ Fine-grained repository permissions +- ✅ Audit trail per installation +- ✅ No user credentials required + +**Cons:** +- Requires GitHub App registration per tenant +- Complex setup (JWT signing, private key management) +- Rate limits per installation (5000 req/hour) + +**Use Case:** Multi-tenant SaaS with many organizations + +**Recommendation:** **Primary choice for production multi-tenant deployments** + +#### Option 2: Fine-Grained PATs + +**Pros:** +- ✅ Repository-specific permissions +- ✅ Simple to generate +- ✅ Per-organization control + +**Cons:** +- ❌ Manual rotation (max 1 year) +- ❌ User-scoped (not org-scoped) +- ❌ Requires secure storage +- ❌ Must be generated per tenant + +**Use Case:** Small number of tenants with manual management + +**Recommendation:** **Fallback option or development/testing** + +#### Option 3: Classic PATs + +**Pros:** +- ✅ Simplest to implement +- ✅ Works with GHE (with Basic auth) + +**Cons:** +- ❌ Broad permissions +- ❌ Manual rotation +- ❌ No tenant isolation + +**Use Case:** Single-tenant deployments, GHE legacy + +**Recommendation:** **Not recommended for multi-tenant** + +### Cache Isolation Strategies + +#### Option 1: Tenant-Partitioned Cache (RECOMMENDED) + +**Design:** +``` +/cache/ + ├── tenant-acme/ + │ └── github.com/acme-corp/repo1 + └── tenant-mega/ + └── github.com/megacorp/repo2 +``` + +**Pros:** +- ✅ Perfect isolation +- ✅ Simple to implement +- ✅ Easy to audit + +**Cons:** +- Higher storage usage (no sharing) +- Duplicate data for public repos + +**Recommendation:** **Primary choice for security-critical deployments** + +#### Option 2: Shared Cache with ACL + +**Design:** +``` +/cache/ + └── github.com/acme-corp/repo1 + + ACL: [tenant-acme] +``` + +**Pros:** +- Lower storage usage +- Shared public repos + +**Cons:** +- ❌ Complex ACL management +- ❌ Risk of ACL misconfiguration +- ❌ Performance overhead + +**Recommendation:** **Not recommended - complexity outweighs benefits** + +### Deployment Patterns + +#### Pattern 1: Sidecar with GitHub Apps + +**Architecture:** +``` +Tenant A Namespace + └── Pod + ├── App Container + └── Goblet Sidecar + - GitHub App Installation: acme-corp + - Cache: /cache/tenant-a/ + +Tenant B Namespace + └── Pod + ├── App Container + └── Goblet Sidecar + - GitHub App Installation: megacorp + - Cache: /cache/tenant-b/ +``` + +**Security:** ✅ Excellent (namespace + network + cache isolation) +**Cost:** High (separate Goblet per tenant) +**Complexity:** Low + +**Recommendation:** **Best for regulated industries, compliance requirements** + +#### Pattern 2: Shared Goblet with Authorization Layer + +**Architecture:** +``` + Shared Goblet Instance + | + +-----------------+------------------+ + | | + Tenant A Tenant B + (GitHub App: acme) (GitHub App: mega) + (Cache: /cache/tenant-a/) (Cache: /cache/tenant-b/) +``` + +**Security:** ✅ Good (authorization + cache partitioning) +**Cost:** Low (single instance) +**Complexity:** High (requires PR #7 + full implementation) + +**Recommendation:** **Best for SaaS platforms with cost optimization** + +--- + +## Security Considerations + +### Token Security + +**Storage:** +- Use Kubernetes Secrets for GitHub App private keys +- Encrypt tokens at rest +- Never log tokens (redact in logs) + +**Rotation:** +- GitHub App tokens auto-rotate (1 hour) +- PATs require manual rotation +- Monitor token expiry and renew proactively + +**Scope:** +- Minimum required permissions +- Repository-level granularity where possible +- Regular permission audits + +### Audit Requirements + +**Log all:** +1. Authentication attempts (success/failure) +2. Authorization decisions (allow/deny with reason) +3. Token generation events +4. Cache access patterns +5. Upstream fetch operations + +**Retention:** +- Security logs: 1 year minimum +- Access logs: 90 days minimum +- Compliance: per industry requirements (7 years for financial) + +### Rate Limiting + +**Per-tenant limits:** +- Requests per second: 100 +- Concurrent operations: 50 +- Cache storage: 100GB + +**GitHub API limits:** +- GitHub Apps: 5000 req/hour per installation +- PATs: 5000 req/hour per token +- Implement backoff and retry + +--- + +## Migration Path + +### Phase 0: Current State + +```go +// Single token for everything +TokenSource: oauth2.StaticTokenSource(&oauth2.Token{ + AccessToken: "ghp_single_token", + TokenType: "Bearer", +}) +``` + +### Phase 1: PR #7 Deployed (URL-Aware Tokens) + +```go +// Can now use URL to select token +TokenSource: func(upstreamURL *url.URL) (*oauth2.Token, error) { + // Extract org from URL + org := extractOrg(upstreamURL) + + // Simple map-based selection + tokens := map[string]string{ + "acme-corp": "ghp_acme_token", + "megacorp": "ghp_mega_token", + } + + token := tokens[org] + if token == "" { + token = "ghp_default_token" + } + + return &oauth2.Token{ + AccessToken: token, + TokenType: "Bearer", + }, nil +} +``` + +**Benefits:** +- ✅ Org-specific tokens +- ✅ GHE support with Basic auth + +**Limitations:** +- ❌ No tenant validation +- ❌ No cache isolation +- ❌ Manual token management + +### Phase 2: Authorization Layer Added + +```go +// In http_proxy_server.go +func (s *httpProxyServer) ServeHTTP(w http.ResponseWriter, r *http.Request) { + // Step 1: Authenticate (existing) + if err := s.config.RequestAuthorizer(r); err != nil { + // Unauthenticated + return + } + + // Step 2: Authorize (NEW) + claims, _ := oidc.GetClaimsFromContext(r.Context()) + repoURL, _ := extractRepoURL(r) + + decision, err := authorizationEngine.ValidateAccess(r.Context(), claims, repoURL) + if err != nil || !decision.Allowed { + http.Error(w, "Forbidden", http.StatusForbidden) + return + } + + // Step 3: Store tenant context for later use + ctx := context.WithValue(r.Context(), tenantKey, decision.TenantID) + *r = *r.WithContext(ctx) + + // Continue with request... +} +``` + +### Phase 3: GitHub App Token Manager + +```go +// In goblet-server/main.go +tokenManager := &GitHubAppTokenManager{ + appID: 123456, + privateKey: loadPrivateKey(), + installations: map[string]int64{ + "acme-corp": 111, + "megacorp": 222, + }, +} + +config := &goblet.ServerConfig{ + TokenSource: func(upstreamURL *url.URL) (*oauth2.Token, error) { + // Get tenant from request context + tenantID := getTenantFromRequestContext() + + // TokenManager handles org mapping + caching + return tokenManager.GetToken(upstreamURL, tenantID) + }, +} +``` + +### Phase 4: Cache Partitioning + +```go +// In managed_repository.go +func openManagedRepository(config *ServerConfig, u *url.URL) (*managedRepository, error) { + // Get tenant from request context + tenantID := getTenantFromContext() + + // Generate tenant-partitioned cache path + cachePath := filepath.Join( + config.LocalDiskCacheRoot, + tenantID, + u.Host, + u.Path, + ) + + // Rest of implementation... +} +``` + +--- + +## Conclusion + +### Summary + +PR #7 provides **critical foundational changes** for multi-tenant GitHub authentication: + +1. **Upstream URL-aware token generation** enables org-specific tokens +2. **Dynamic token type support** enables GitHub Enterprise compatibility +3. **Foundation for TokenManager** that can implement complex token strategies + +However, PR #7 alone is **not sufficient** for secure multi-tenant operation. Complete implementation requires: + +1. **Authorization Layer** - Enforce repo-level access control +2. **Token Manager** - Implement GitHub App token generation with caching +3. **Cache Partitioning** - Isolate cached data per tenant +4. **Audit Logging** - Track all access and authorization decisions + +### Recommended Implementation + +**For production multi-tenant deployments:** + +1. **Deploy PR #7** - Foundation for all other work +2. **Implement Authorization Layer** (Phase 2) - Critical for security +3. **Deploy GitHub Apps** (Phase 3) - Best token management +4. **Enable Cache Partitioning** (Phase 4) - Complete isolation +5. **Security Hardening** (Phase 5) - Production readiness + +**Estimated Timeline:** 12-16 weeks for complete implementation + +**Alternative for Faster Deployment:** +- Use sidecar pattern (one Goblet per tenant) +- Simpler setup, higher cost +- Can migrate to shared model later + +### Risk Assessment + +**Without Full Implementation:** +- 🔴 **Critical:** Cross-tenant cache access (CVSS 8.1) +- 🔴 **High:** Token misuse across organizations +- 🟡 **Medium:** Manual token rotation burden +- 🟡 **Medium:** No audit trail for compliance + +**With PR #7 + Full Implementation:** +- ✅ **Secure:** Complete tenant isolation +- ✅ **Compliant:** Full audit trail +- ✅ **Scalable:** Automatic token management +- ✅ **Maintainable:** Clear security boundaries + +--- + +## References + +1. [GitHub Apps Documentation](https://docs.github.com/en/apps) +2. [GitHub OAuth2 Token Types](https://docs.github.com/en/authentication) +3. [GitHub Enterprise Authentication](https://docs.github.com/en/enterprise-server/authentication) +4. [RFC-001: Secure Multi-Tenant Cache](secure-multi-tenant-rfc.md) +5. [PR #7: Upstream Authentication Improvements](https://github.com/jrepp/github-cache-daemon/pull/7) +6. [Isolation Strategies](../security/isolation-strategies.md) + +--- + +## Appendix A: GitHub API Endpoints + +### GitHub App Authentication + +```bash +# 1. Create JWT +JWT=$(generate_jwt $APP_ID $PRIVATE_KEY) + +# 2. List installations +curl -H "Authorization: Bearer $JWT" \ + -H "Accept: application/vnd.github+json" \ + https://api.github.com/app/installations + +# 3. Get installation token +curl -X POST \ + -H "Authorization: Bearer $JWT" \ + -H "Accept: application/vnd.github+json" \ + https://api.github.com/app/installations/$INSTALLATION_ID/access_tokens +``` + +### Token Permissions + +**GitHub App - Minimum Permissions:** +```json +{ + "permissions": { + "contents": "read", + "metadata": "read" + }, + "repositories": ["repo1", "repo2"] +} +``` + +**Fine-Grained PAT - Minimum Permissions:** +``` +Repository access: Selected repositories +Permissions: + - Contents: Read-only + - Metadata: Read-only +``` + +## Appendix B: Configuration Examples + +### GitHub App Configuration + +```yaml +# goblet-config.yaml +github: + app_id: 123456 + private_key_path: /secrets/github-app.pem + installations: + - org: acme-corp + installation_id: 111 + - org: megacorp + installation_id: 222 + +tenants: + - id: tenant-acme + name: "Acme Corporation" + github_orgs: ["acme-corp"] + allowed_repos: + - "github.com/acme-corp/*" + + - id: tenant-mega + name: "MegaCorp Industries" + github_orgs: ["megacorp"] + allowed_repos: + - "github.com/megacorp/*" +``` + +### Environment Variables + +```bash +# GitHub App +export GITHUB_APP_ID=123456 +export GITHUB_APP_PRIVATE_KEY_PATH=/secrets/github-app.pem + +# OIDC +export OIDC_ISSUER_URL=https://auth.example.com +export OIDC_CLIENT_ID=goblet-cache + +# Cache +export CACHE_ROOT=/cache +export CACHE_ISOLATION_MODE=tenant +``` From 303940055a73f758d3c2745c0f52f7e85f737f60 Mon Sep 17 00:00:00 2001 From: Jacob Repp Date: Fri, 7 Nov 2025 02:23:30 -0800 Subject: [PATCH 7/9] docs: update repository references to goblet Update repository URLs from github-cache-daemon to goblet to match upstream naming convention. Changes: - Updated RFC-002 PR link reference - Updated CHANGELOG unreleased comparison link --- CHANGELOG.md | 2 +- docs/architecture/rfc-002-github-oauth-multi-tenancy.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78b2b07..6cbd3f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,4 +44,4 @@ When creating a new release, copy the following template and fill in the details - Security-related changes and fixes ``` -[Unreleased]: https://github.com/jrepp/github-cache-daemon/compare/main...HEAD +[Unreleased]: https://github.com/jrepp/goblet/compare/main...HEAD diff --git a/docs/architecture/rfc-002-github-oauth-multi-tenancy.md b/docs/architecture/rfc-002-github-oauth-multi-tenancy.md index 3b27ccd..a5fbaf8 100644 --- a/docs/architecture/rfc-002-github-oauth-multi-tenancy.md +++ b/docs/architecture/rfc-002-github-oauth-multi-tenancy.md @@ -1112,7 +1112,7 @@ However, PR #7 alone is **not sufficient** for secure multi-tenant operation. Co 2. [GitHub OAuth2 Token Types](https://docs.github.com/en/authentication) 3. [GitHub Enterprise Authentication](https://docs.github.com/en/enterprise-server/authentication) 4. [RFC-001: Secure Multi-Tenant Cache](secure-multi-tenant-rfc.md) -5. [PR #7: Upstream Authentication Improvements](https://github.com/jrepp/github-cache-daemon/pull/7) +5. [PR #7: Upstream Authentication Improvements](https://github.com/jrepp/goblet/pull/7) 6. [Isolation Strategies](../security/isolation-strategies.md) --- From 4b29a4692774791f7dbe4f07acd4ddf8aa9c668d Mon Sep 17 00:00:00 2001 From: Jacob Repp Date: Fri, 7 Nov 2025 07:13:50 -0800 Subject: [PATCH 8/9] fix: format code and remove unused grpc-gateway dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Format goblet-server/main.go with gofmt - Remove github.com/grpc-ecosystem/grpc-gateway dependency (no longer used) - Clean up go.sum entries for unused dependencies These changes ensure CI passes with fmt-check and tidy-check. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- go.mod | 1 - go.sum | 8 -------- goblet-server/main.go | 8 ++++---- 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/go.mod b/go.mod index e26ee0a..5a2a804 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,6 @@ require ( github.com/go-git/go-git/v5 v5.16.3 github.com/google/gitprotocolio v0.0.0-20210704173409-b5a56823ae52 github.com/google/uuid v1.6.0 - github.com/grpc-ecosystem/grpc-gateway v1.16.0 github.com/minio/minio-go/v7 v7.0.97 go.opencensus.io v0.24.0 golang.org/x/oauth2 v0.32.0 diff --git a/go.sum b/go.sum index 30a1e04..e5c31b6 100644 --- a/go.sum +++ b/go.sum @@ -84,7 +84,6 @@ github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk5 github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= -github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/aws/aws-sdk-go v1.55.8 h1:JRmEUbU52aJQZ2AjX4q4Wu7t4uZjOu71uyNmaWlUkJQ= @@ -137,7 +136,6 @@ github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfU github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c= github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI= @@ -247,8 +245,6 @@ github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+ github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/googleapis/gax-go/v2 v2.15.0 h1:SyjDc1mGgZU5LncH8gimWo9lW1DtIfPibOG81vgd/bo= github.com/googleapis/gax-go/v2 v2.15.0/go.mod h1:zVVkkxAQHa1RQpg9z2AUCMnKhi0Qld9rcmyfL1OZhoc= -github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= -github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= @@ -355,7 +351,6 @@ github.com/prometheus/prometheus v0.307.3 h1:zGIN3EpiKacbMatcUL2i6wC26eRWXdoXfNP github.com/prometheus/prometheus v0.307.3/go.mod h1:sPbNW+KTS7WmzFIafC3Inzb6oZVaGLnSvwqTdz2jxRQ= github.com/prometheus/statsd_exporter v0.22.7 h1:7Pji/i2GuhK6Lu7DHrtTkFmNBCudCPT1pX2CziuyQR0= github.com/prometheus/statsd_exporter v0.22.7/go.mod h1:N/TevpjkIh9ccs6nuzY3jQn9dFqnUakOjnEuMPJJJnI= -github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= @@ -677,7 +672,6 @@ google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfG google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= @@ -702,7 +696,6 @@ google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKa google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= -google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= google.golang.org/grpc v1.76.0 h1:UnVkv1+uMLYXoIz6o7chp59WfQUYA2ex/BXQ9rHZu7A= google.golang.org/grpc v1.76.0/go.mod h1:Ju12QI8M6iQJtbcsV+awF5a4hfJMLi4X0JLo94ULZ6c= @@ -733,7 +726,6 @@ gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME= gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/goblet-server/main.go b/goblet-server/main.go index e37b089..5b51c61 100644 --- a/goblet-server/main.go +++ b/goblet-server/main.go @@ -331,10 +331,10 @@ func main() { } config := &goblet.ServerConfig{ - LocalDiskCacheRoot: *cacheRoot, - URLCanonializer: urlCanonicalizer, - RequestAuthorizer: authorizer, - TokenSource: func(upstreamURL *url.URL) (*oauth2.Token, error) { + LocalDiskCacheRoot: *cacheRoot, + URLCanonializer: urlCanonicalizer, + RequestAuthorizer: authorizer, + TokenSource: func(upstreamURL *url.URL) (*oauth2.Token, error) { return ts.Token() }, ErrorReporter: er, From 9608e204d0d39d052b7645f5f3f39ee3c039a0e7 Mon Sep 17 00:00:00 2001 From: Jacob Repp Date: Fri, 7 Nov 2025 07:15:44 -0800 Subject: [PATCH 9/9] fix: update test code for new TokenSource signature and address linting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update test_proxy_server.go to use new TokenSource function signature - Add nolint directive for gRPC to HTTP status mapping function (inherently complex switch statement with 15 cases) All CI checks now pass locally. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- reporting.go | 1 + testing/test_proxy_server.go | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/reporting.go b/reporting.go index ccee1c0..3414fc2 100644 --- a/reporting.go +++ b/reporting.go @@ -177,6 +177,7 @@ func (w *monitoringWriter) Header() http.Header { return w.w.Header() } +//nolint:gocyclo // Switch statement for gRPC to HTTP status mapping is inherently complex func httpStatusFromCode(code codes.Code) int { switch code { case codes.OK: diff --git a/testing/test_proxy_server.go b/testing/test_proxy_server.go index 6ff49b4..c1b2c00 100644 --- a/testing/test_proxy_server.go +++ b/testing/test_proxy_server.go @@ -90,9 +90,11 @@ func NewTestServer(config *TestServerConfig) *TestServer { LocalDiskCacheRoot: dir, URLCanonializer: s.testURLCanonicalizer, RequestAuthorizer: config.RequestAuthorizer, - TokenSource: config.TokenSource, - ErrorReporter: config.ErrorReporter, - RequestLogger: config.RequestLogger, + TokenSource: func(upstreamURL *url.URL) (*oauth2.Token, error) { + return config.TokenSource.Token() + }, + ErrorReporter: config.ErrorReporter, + RequestLogger: config.RequestLogger, } // Set upstream enabled status using thread-safe method if config.UpstreamEnabled != nil {