diff --git a/generators/github/error.go b/generators/github/error.go index dc2ecf84..0d6797e5 100644 --- a/generators/github/error.go +++ b/generators/github/error.go @@ -9,6 +9,7 @@ import ( const ( ErrGenerateGitHubPackageCode = "meshkit-11139" ErrInvalidGitHubSourceURLCode = "meshkit-11140" + ErrGetGithubDefaultBranchCode = "meshkit-11510" ) func ErrGenerateGitHubPackage(err error, pkgName string) error { @@ -18,3 +19,7 @@ func ErrGenerateGitHubPackage(err error, pkgName string) error { func ErrInvalidGitHubSourceURL(err error) error { return errors.New(ErrInvalidGitHubSourceURLCode, errors.Alert, []string{}, []string{err.Error()}, []string{"sourceURL provided might be invalid", "provided repo/version tag does not exist"}, []string{"ensure source url follows the format: git://////"}) } + +func ErrGetDefaultBranch(err error, owner, repo string) error { + return errors.New(ErrGetGithubDefaultBranchCode, errors.Alert, []string{fmt.Sprintf("Error while getting default branch for %s/%s", owner, repo)}, []string{err.Error()}, []string{"The repository might not exist"}, []string{}) +} diff --git a/generators/github/git_repo.go b/generators/github/git_repo.go index a57c6217..4958c3d4 100644 --- a/generators/github/git_repo.go +++ b/generators/github/git_repo.go @@ -3,6 +3,7 @@ package github import ( "bufio" "fmt" + "net/http" "net/url" "os" "path/filepath" @@ -20,7 +21,8 @@ type GitRepo struct { PackageName string } -// Assumpations: 1. Always a K8s manifest +// Assumptions: +// 1. Always a K8s manifest // 2. Unzipped/unarchived File type func (gr GitRepo) GetContent() (models.Package, error) { @@ -78,17 +80,61 @@ func (gr GitRepo) GetContent() (models.Package, error) { } func (gr GitRepo) extractRepoDetailsFromSourceURL() (owner, repo, branch, root string, err error) { - parts := strings.SplitN(strings.TrimPrefix(gr.URL.Path, "/"), "/", 4) + // Trim slashes from both ends to handle "owner/repo/" correctly + path := strings.Trim(gr.URL.Path, "/") + + // If path is empty after trimming, it's definitely invalid + if path == "" { + return "", "", "", "", ErrInvalidGitHubSourceURL(fmt.Errorf("Source URL %s is invalid", gr.URL.String())) + } + + // Split the path into parts + raw := strings.Split(path, "/") + parts := make([]string, 0) + for _, p := range raw { + if p != "" { + parts = append(parts, p) + } + } + // Handle GitHub Web UI URLS: + // Pattern: owner/repo/tree/branch/... or owner/repo/blob/branch/... + if len(parts) >= 3 && (parts[2] == "tree" || parts[2] == "blob") { + // Remove 'tree' or 'blob' so the rest of the logic sees [owner, repo, branch, root] + parts = append(parts[:2], parts[3:]...) + } + + // Handle Release URLs: + // Pattern: owner/repo/releases/tag/v1.0 + if len(parts) >= 5 && parts[2] == "releases" && parts[3] == "tag" { + owner, repo = parts[0], parts[1] + branch = parts[4] // The tag is treated as the 'branch' for cloning purposes + root = "/**" + return owner, repo, branch, root, nil + } size := len(parts) - if size > 3 { - owner = parts[0] - repo = parts[1] - branch = parts[2] - root = parts[3] - - } else { - err = ErrInvalidGitHubSourceURL(fmt.Errorf("Source URL %s is invalid, specify owner, repo, branch and filepath in the url according to the specified source url format", gr.URL.String())) + + switch { + case size >= 4: + // Use the first three for owner, repo, branch, and join the rest for root + owner, repo, branch = parts[0], parts[1], parts[2] + root = strings.Join(parts[3:], "/") + case size == 3: + owner, repo, branch = parts[0], parts[1], parts[2] + root = "/**" + case size == 2: + owner, repo = parts[0], parts[1] + // we use the default http client here + + b, err := GetDefaultBranchFromGitHub("https://api.github.com", owner, repo, http.DefaultClient) + if err != nil { + return "", "", "", "", err + } + branch = b + root = "/**" + default: + return "", "", "", "", ErrInvalidGitHubSourceURL(fmt.Errorf("Source URL %s is invalid; expected owner/repo[/branch[/path]]", gr.URL.String())) } + return } diff --git a/generators/github/git_repo_branch.go b/generators/github/git_repo_branch.go new file mode 100644 index 00000000..15e707d3 --- /dev/null +++ b/generators/github/git_repo_branch.go @@ -0,0 +1,30 @@ +package github + +import ( + "encoding/json" + "fmt" + "net/http" +) + +func GetDefaultBranchFromGitHub(baseUrl, owner, repo string, client *http.Client) (string, error) { + if client == nil { + client = http.DefaultClient + } + url := fmt.Sprintf("%s/repos/%s/%s", baseUrl, owner, repo) + resp, err := client.Get(url) + if err != nil { + return "", ErrGetDefaultBranch(err, owner, repo) + } + defer resp.Body.Close() + if resp.StatusCode != 200 { + return "", ErrGetDefaultBranch(fmt.Errorf("github api: %d", resp.StatusCode), owner, repo) + } + var out struct { + DefaultBranch string `json:"default_branch"` + } + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return "", ErrGetDefaultBranch(err, owner, repo) + } + return out.DefaultBranch, nil + +} diff --git a/generators/github/git_repo_branch_test.go b/generators/github/git_repo_branch_test.go new file mode 100644 index 00000000..ca9e0f3f --- /dev/null +++ b/generators/github/git_repo_branch_test.go @@ -0,0 +1,73 @@ +package github + +import ( + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "testing" +) + +func TestGetGithubRepoBranch(t *testing.T) { + tests := []struct { + name string + owner string + repo string + mockStatus int + mockBody any // Using any allows us to pass structs or raw strings + wantBranch string + wantErr bool + }{ + { + name: "valid repo", + owner: "octocat", + repo: "Hello-World", + mockStatus: http.StatusOK, + mockBody: map[string]string{"default_branch": "main"}, + wantBranch: "main", + wantErr: false, + }, + { + name: "valid repo with master branch", + owner: "octocat", + repo: "Hello-Mesh", + mockStatus: http.StatusOK, + mockBody: map[string]string{"default_branch": "master"}, + wantBranch: "master", + wantErr: false, + }, + { + name: "repo not found", + owner: "octocat", + repo: "NonExistentRepo", + mockStatus: http.StatusNotFound, + mockBody: map[string]string{"message": "Not Found"}, + wantBranch: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + expectedPath := fmt.Sprintf("/repos/%s/%s", tt.owner, tt.repo) + if r.URL.Path != expectedPath { + t.Errorf("expected path %s, got %s", expectedPath, r.URL.Path) + } + + w.WriteHeader(tt.mockStatus) + json.NewEncoder(w).Encode(tt.mockBody) + })) + defer server.Close() + + branch, err := GetDefaultBranchFromGitHub(server.URL, tt.owner, tt.repo, server.Client()) + + if (err != nil) != tt.wantErr { + t.Fatalf("GetDefaultBranchFromGitHub() unexpected error state: %v", err) + } + if branch != tt.wantBranch { + t.Errorf("got branch %q, want %q", branch, tt.wantBranch) + } + }) + } +} diff --git a/generators/github/git_repo_test.go b/generators/github/git_repo_test.go new file mode 100644 index 00000000..b26a476e --- /dev/null +++ b/generators/github/git_repo_test.go @@ -0,0 +1,145 @@ +package github + +import ( + "net/url" + "testing" +) + +func TestExtractRepoDetailsFromSourceURL(t *testing.T) { + tests := []struct { + name string + input string + wantOwner string + wantRepo string + wantBranch string + wantRoot string + wantErr bool + }{ + { + name: "owner and repo only", + input: "git://github.com/meshery/meshkit", + wantOwner: "meshery", + wantRepo: "meshkit", + wantBranch: "master", + wantRoot: "/**", + wantErr: false, + }, + { + name: "owner repo and branch", + input: "git://github.com/meshery/meshkit/master", + wantOwner: "meshery", + wantRepo: "meshkit", + wantBranch: "master", + wantRoot: "/**", + wantErr: false, + }, + { + name: "owner repo branch and path", + input: "git://github.com/meshery/meshkit/master/install/kubernetes", + wantOwner: "meshery", + wantRepo: "meshkit", + wantBranch: "master", + wantRoot: "install/kubernetes", + wantErr: false, + }, + { + name: "invalid single component", + input: "git://github.com/meshery", + wantErr: true, + }, + { + name: "trailing slash on repo", + input: "git://github.com/meshery/meshkit/", + wantOwner: "meshery", + wantRepo: "meshkit", + wantBranch: "master", + wantRoot: "/**", + wantErr: false, + }, + // AFter gemini review these should gail + { + name: "trailing slash triggers default branch", + input: "git://github.com/meshery/meshkit/", // Trailing slash + wantOwner: "meshery", + wantRepo: "meshkit", + wantBranch: "master", // The old code would return "" here + wantRoot: "/**", + wantErr: false, + }, + { + name: "nested path depth greater than 4", + input: "git://github.com/meshery/meshkit/master/install/kubernetes/helm/meshery", + wantOwner: "meshery", + wantRepo: "meshkit", + wantBranch: "master", + wantRoot: "install/kubernetes/helm/meshery", // The old code would cut this off + wantErr: false, + }, + { + name: "multiple slashes in path", + input: "git://github.com/meshery/meshkit///master/", + wantOwner: "meshery", + wantRepo: "meshkit", + wantBranch: "master", + wantRoot: "/**", + wantErr: false, + }, + { + name: "GitHub Browser URL - Tree", + input: "https://github.com/meshery/meshkit/tree/master/install", + wantOwner: "meshery", + wantRepo: "meshkit", + wantBranch: "master", + wantRoot: "install", + wantErr: false, + }, + { + name: "GitHub Browser URL - Blob", + input: "https://github.com/meshery/meshkit/blob/master/models/meshmodel.yaml", + wantOwner: "meshery", + wantRepo: "meshkit", + wantBranch: "master", + wantRoot: "models/meshmodel.yaml", + wantErr: false, + }, + { + name: "GitHub Release URL", + input: "https://github.com/meshery/meshkit/releases/tag/v0.6.0", + wantOwner: "meshery", + wantRepo: "meshkit", + wantBranch: "v0.6.0", + wantRoot: "/**", + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + u, err := url.Parse(tt.input) + if err != nil { + t.Fatalf("failed to parse url %s: %v", tt.input, err) + } + gr := GitRepo{URL: u} + owner, repo, branch, root, err := gr.ExtractRepoDetailsFromSourceURL() + if (err != nil) != tt.wantErr { + t.Fatalf("unexpected error state: got err=%v, wantErr=%v", err, tt.wantErr) + } + if err != nil { + // on error we are done + return + } + if owner != tt.wantOwner { + t.Fatalf("owner: got %q want %q", owner, tt.wantOwner) + } + if repo != tt.wantRepo { + t.Fatalf("repo: got %q want %q", repo, tt.wantRepo) + } + if branch != tt.wantBranch { + t.Fatalf("branch: got %q want %q", branch, tt.wantBranch) + } + if root != tt.wantRoot { + t.Fatalf("root: got %q want %q", root, tt.wantRoot) + } + }) + } +} diff --git a/generators/github/package_manager.go b/generators/github/package_manager.go index df79bcae..4db430b0 100644 --- a/generators/github/package_manager.go +++ b/generators/github/package_manager.go @@ -3,6 +3,7 @@ package github import ( "errors" "net/url" + "strings" "github.com/meshery/meshkit/generators/models" "github.com/meshery/meshkit/utils/walker" @@ -20,6 +21,17 @@ func (ghpm GitHubPackageManager) GetPackage() (models.Package, error) { return nil, err } protocol := url.Scheme + // not all https links are URL downloaders, some are git repos + // Check if it's a GitHub Browser/Web UI link (contains /tree/ or /blob/ or /releases/) + isGitHubUI := url.Host == "github.com" && (strings.Contains(url.Path, "/tree/") || strings.Contains(url.Path, "/blob/") || strings.Contains(url.Path, "/releases/")) + // Check if it's a "Short" link (just owner/repo) + pathParts := strings.Split(strings.Trim(url.Path, "/"), "/") + isShortRepoLink := url.Host == "github.com" && len(pathParts) == 2 + + // If it's either of those, we MUST use 'git' protocol (GitRepo walker) + if (protocol == "https" || protocol == "http") && (isGitHubUI || isShortRepoLink) { + protocol = "git" + } downloader := NewDownloaderForScheme(protocol, url, ghpm.PackageName) if downloader == nil { diff --git a/generators/github/package_test.go b/generators/github/package_test.go index 374ba4c7..32a2cf02 100644 --- a/generators/github/package_test.go +++ b/generators/github/package_test.go @@ -81,6 +81,71 @@ func TestGenerateCompFromGitHub(t *testing.T) { }, want: 1, }, + + // This test case is to solve the ExtractZip function's root path handling + { // Source pointing to a zip containing CRDs + ghPackageManager: GitHubPackageManager{ + PackageName: "acm-controller", + SourceURL: "git://github.com/muhammadolammi/meshcrds/main/crds.zip", + }, + want: 2, + }, + // This test case is for the feature where root is empty in the source URL + { // Source pointing to a git with just branch containing crds in root and a zip + ghPackageManager: GitHubPackageManager{ + PackageName: "acm-controller", + SourceURL: "git://github.com/muhammadolammi/meshcrds/main/", + }, + want: 5, + }, + // This test case is for the feature where root and branch is empty in the source URL + { // Source pointing to a git with no branch containing crds in root and a zip + ghPackageManager: GitHubPackageManager{ + PackageName: "acm-controller", + SourceURL: "git://github.com/muhammadolammi/meshcrds", + }, + want: 5, + }, + // github.com verisons of the above 3 test case + { // Source pointing to a git with just branch containing crds in root and a zip + ghPackageManager: GitHubPackageManager{ + PackageName: "acm-controller", + SourceURL: "https://github.com/muhammadolammi/meshcrds/tree/main/crds.zip", + }, + want: 2, + }, + + // This test case is for the feature where root is empty in the source URL + { // Source pointing to a git with just branch containing crds in root and a zip + ghPackageManager: GitHubPackageManager{ + PackageName: "acm-controller", + SourceURL: "https://github.com/muhammadolammi/meshcrds/tree/main/", + }, + want: 5, + }, + // This test case is for the feature where root and branch is empty in the source URL + { // Source pointing to a git with no branch containing crds in root and a zip + ghPackageManager: GitHubPackageManager{ + PackageName: "acm-controller", + SourceURL: "https://github.com/muhammadolammi/meshcrds", + }, + want: 5, + }, + // Testing GitHub Release Web UI URL + { + ghPackageManager: GitHubPackageManager{ + PackageName: "acm-controller-release", + SourceURL: "https://github.com/muhammadolammi/meshcrds/releases/tag/v1.0", + }, + want: 5, // Assuming v1.0 contains the 5 CRDs + }, + { // Source pointing to a specific file via GitHub Web UI (Blob) + ghPackageManager: GitHubPackageManager{ + PackageName: "single-crd-blob", + SourceURL: "https://github.com/muhammadolammi/meshcrds/blob/main/y1.yml", + }, + want: 1, + }, } for _, test := range tests { @@ -97,8 +162,8 @@ func TestGenerateCompFromGitHub(t *testing.T) { t.Errorf("error while generating components: %v", err) return } + currentDirectory := t.TempDir() for _, comp := range comps { - currentDirectory, _ := os.Getwd() if comp.Model == nil { t.Errorf("component %s has nil Model", comp.Component.Kind) continue @@ -113,7 +178,6 @@ func TestGenerateCompFromGitHub(t *testing.T) { } } byt, _ := json.MarshalIndent(comp, "", "") - f, err := os.Create(fmt.Sprintf("%s/%s%s", dirName, comp.Component.Kind, ".json")) if err != nil { t.Errorf("error creating file for %s: %v", comp.Component.Kind, err) @@ -121,11 +185,14 @@ func TestGenerateCompFromGitHub(t *testing.T) { } _, _ = f.Write(byt) } + t.Log("generated ", len(comps), "want: ", test.want) + if len(comps) != test.want { t.Errorf("generated %d, want %d", len(comps), test.want) return } + }) } } diff --git a/utils/unarchive.go b/utils/unarchive.go index 4e729cae..fe359f45 100644 --- a/utils/unarchive.go +++ b/utils/unarchive.go @@ -4,6 +4,7 @@ import ( "archive/tar" "archive/zip" "compress/gzip" + "fmt" "io" "net/http" "os" @@ -72,49 +73,58 @@ func readData(name string) (buffer []byte, err error) { func ExtractZip(path, artifactPath string) error { zipReader, err := zip.OpenReader(artifactPath) - defer func() { - _ = zipReader.Close() - }() - if err != nil { return ErrExtractZip(err, path) } - buffer := make([]byte, 1<<4) + defer zipReader.Close() + destDir, err := filepath.Abs(path) + if err != nil { + return ErrExtractZip(err, path) + } for _, file := range zipReader.File { + err := func() error { + targetPath, err := filepath.Abs(filepath.Join(destDir, file.Name)) + if err != nil { + return err + } - fd, err := file.Open() - defer func() { - _ = fd.Close() - }() - - if err != nil { - return ErrExtractZip(err, path) - } + if !strings.HasPrefix(targetPath, destDir+string(os.PathSeparator)) && targetPath != destDir { + return fmt.Errorf("zipslip: illegal file path: %s", file.Name) + } - filePath := filepath.Join(path, file.Name) + // CHECK for files to skip (macOS metadata) + if strings.HasPrefix(filepath.Base(targetPath), "._") || filepath.Base(targetPath) == "__MACOSX" { + return nil + } - if file.FileInfo().IsDir() { - err := os.Mkdir(file.Name, file.Mode()) + fd, err := file.Open() if err != nil { - return ErrExtractZip(err, path) + return err } - } else { - openedFile, err := os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, file.Mode()) - if err != nil { - return ErrExtractZip(err, path) + defer fd.Close() + + if file.FileInfo().IsDir() { + return os.MkdirAll(targetPath, file.Mode()) + } + + if err := os.MkdirAll(filepath.Dir(targetPath), 0755); err != nil { + return err } - _, err = io.CopyBuffer(openedFile, fd, buffer) + + openedFile, err := os.OpenFile(targetPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, file.Mode()) if err != nil { - return ErrExtractZip(err, path) + return err } - defer func() { - _ = openedFile.Close() - }() - } + defer openedFile.Close() + _, err = io.Copy(openedFile, fd) + return err + }() + if err != nil { + return ErrExtractZip(err, path) + } } return nil - } func ExtractTarGz(path, downloadfilePath string) error { diff --git a/utils/zip_test.go b/utils/zip_test.go new file mode 100644 index 00000000..145cac3a --- /dev/null +++ b/utils/zip_test.go @@ -0,0 +1,108 @@ +package utils + +import ( + "archive/zip" + "os" + "path/filepath" + "testing" +) + +func TestExtractZip(t *testing.T) { + tests := []struct { + name string + files map[string]string // fileName: content + wantErr bool + errMatch string + }{ + { + name: "Valid Extraction", + files: map[string]string{ + "test.txt": "hello world", + "subdir/file.txt": "nested content", + }, + wantErr: false, + }, + { + name: "Zip Slip Attack Attempt", + files: map[string]string{ + "../outside.txt": "malicious content", + }, + wantErr: true, + errMatch: "zipslip: illegal file path", + }, + { + name: "Skip macOS Metadata", + files: map[string]string{ + "__MACOSX/secret": "should skip", + "._metadata": "should skip", + "realfile.txt": "should keep", + }, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tmpDir, _ := os.MkdirTemp("", "extract-test-*") + defer os.RemoveAll(tmpDir) + + zipFile, _ := os.CreateTemp("", "test-*.zip") + defer os.Remove(zipFile.Name()) + + writer := zip.NewWriter(zipFile) + for name, content := range tt.files { + f, _ := writer.Create(name) + f.Write([]byte(content)) + } + writer.Close() + zipFile.Close() + + err := ExtractZip(tmpDir, zipFile.Name()) + + if (err != nil) != tt.wantErr { + t.Errorf("ExtractZip() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func TestExtractZip_Destination(t *testing.T) { + destDir, _ := os.MkdirTemp("", "correct-dest-*") + defer os.RemoveAll(destDir) + + cwd, _ := os.Getwd() + + subDirName := "target-subfolder" + zipFile, _ := os.CreateTemp("", "test-*.zip") + writer := zip.NewWriter(zipFile) + + header := &zip.FileHeader{ + Name: subDirName + "/", + Method: zip.Store, + } + header.SetMode(0755) + _, _ = writer.CreateHeader(header) + + f, _ := writer.Create(filepath.Join(subDirName, "file.txt")) + f.Write([]byte("content")) + + writer.Close() + zipFile.Close() + defer os.Remove(zipFile.Name()) + + extractionErr := ExtractZip(destDir, zipFile.Name()) + wrongPath := filepath.Join(cwd, subDirName) + if _, err := os.Stat(wrongPath); err == nil { + t.Errorf("BUG FOUND: Folder was created in CWD: %s", wrongPath) + os.RemoveAll(wrongPath) + } + + rightPath := filepath.Join(destDir, subDirName) + if _, err := os.Stat(rightPath); os.IsNotExist(err) { + t.Errorf("FAILURE: Folder was NOT created in destination: %s", rightPath) + } + if extractionErr != nil { + t.Fatalf("Extraction failed: %v", extractionErr) + } + +}