From eb5a2ebf8d2e8ffd829137560e3e70c819de91c9 Mon Sep 17 00:00:00 2001 From: Abhijeet Dey Date: Sat, 3 Jan 2026 16:17:49 +0530 Subject: [PATCH 1/4] Added [Feature] Auto-discover CRDs from repository root URL Signed-off-by: Abhijeet Dey --- generators/github/git_repo.go | 162 ++++++++++++++++++++++++-- generators/github/scheme_interface.go | 33 ++++++ 2 files changed, 183 insertions(+), 12 deletions(-) diff --git a/generators/github/git_repo.go b/generators/github/git_repo.go index 9527d5da..ce14b148 100644 --- a/generators/github/git_repo.go +++ b/generators/github/git_repo.go @@ -50,13 +50,29 @@ func (gr GitRepo) GetContent() (models.Package, error) { _ = br.Flush() _ = fd.Close() }() + + // If root is not specified, enable recursive traversal from root to discover CRDs automatically + // This makes the generator robust to repository structure changes + rootPath := root + isAutoDiscovery := rootPath == "" + if isAutoDiscovery { + // Use "/**" to enable recursive traversal from repository root + rootPath = "/**" + } + gw := gitWalker. Owner(owner). Repo(repo). Branch(branch). - Root(root). - RegisterFileInterceptor(fileInterceptor(br)). - RegisterDirInterceptor(dirInterceptor(br)) + Root(rootPath). + RegisterFileInterceptor(crdAwareFileInterceptor(br)) + + // Register dirInterceptor to handle Helm charts which may contain CRDs + // Note: When doing automatic discovery (recurse mode), dirInterceptor processes directories + // and fileInterceptor processes files. For Helm charts, dirInterceptor extracts CRDs from + // the chart structure, while fileInterceptor finds standalone CRD files. This ensures we + // discover CRDs in both formats without missing any. + gw = gw.RegisterDirInterceptor(dirInterceptor(br)) if version != "" { gw = gw.ReferenceName(fmt.Sprintf("refs/tags/%s", version)) @@ -78,17 +94,62 @@ func (gr GitRepo) GetContent() (models.Package, error) { } func (gr GitRepo) extractRepoDetailsFromSourceURL() (owner, repo, branch, root string, err error) { - parts := strings.SplitN(strings.TrimPrefix(gr.URL.Path, "/"), "/", 4) + path := strings.TrimPrefix(gr.URL.Path, "/") + parts := strings.Split(path, "/") size := len(parts) - if size > 3 { - owner = parts[0] - repo = parts[1] - branch = parts[2] - root = parts[3] - - } else { - err = ErrInvalidGitHubSourceURL(fmt.Errorf("Source URL %s is invalid, specify owner, repo, branch and filepath in the url according to the specified source url format", gr.URL.String())) + + // Minimum required: owner and repo + if size < 2 { + err = ErrInvalidGitHubSourceURL(fmt.Errorf("Source URL %s is invalid, must specify at least owner and repo", gr.URL.String())) + return + } + + owner = parts[0] + repo = parts[1] + + // Remove .git suffix from repo name if present + repo = strings.TrimSuffix(repo, ".git") + + // Handle standard GitHub URL formats: + // - https://github.com/owner/repo + // - https://github.com/owner/repo/tree/branch + // - https://github.com/owner/repo/tree/branch/path/to/dir + // - git://github.com/owner/repo/branch/path (legacy format) + + branch = "main" // default branch + root = "" + + if size >= 3 { + // Check if this is a standard GitHub URL with /tree/branch format + if parts[2] == "tree" && size >= 4 { + // Format: owner/repo/tree/branch[/path...] + branch = parts[3] + if size > 4 { + // Reconstruct the path after branch + root = strings.Join(parts[4:], "/") + } + } else if parts[2] == "blob" { + // Format: owner/repo/blob/branch/path/to/file + // This is a file URL, not a directory - we'll treat it as root path + if size >= 4 { + branch = parts[3] + if size > 4 { + root = strings.Join(parts[4:], "/") + } + } + } else { + // Legacy format: owner/repo/branch[/path...] + branch = parts[2] + if size > 3 { + // Reconstruct the path after branch + root = strings.Join(parts[3:], "/") + } + } } + + // If root is empty, we'll use "/**" for recursive traversal in GetContent + // This enables automatic CRD discovery + return } @@ -96,6 +157,7 @@ func (gr GitRepo) ExtractRepoDetailsFromSourceURL() (owner, repo, branch, root s return gr.extractRepoDetailsFromSourceURL() } +// fileInterceptor processes all files (original behavior) func fileInterceptor(br *bufio.Writer) walker.FileInterceptor { return func(file walker.File) error { tempPath := filepath.Join(os.TempDir(), utils.GetRandomAlphabetsOfDigit(5)) @@ -103,6 +165,82 @@ func fileInterceptor(br *bufio.Writer) walker.FileInterceptor { } } +// crdAwareFileInterceptor only processes files that contain CRDs +// This enables automatic CRD discovery without requiring specific directory paths +func crdAwareFileInterceptor(br *bufio.Writer) walker.FileInterceptor { + return func(file walker.File) error { + // Check if the file is a YAML/JSON file that might contain CRDs + fileName := strings.ToLower(file.Name) + isYAML := strings.HasSuffix(fileName, ".yaml") || strings.HasSuffix(fileName, ".yml") + isJSON := strings.HasSuffix(fileName, ".json") + + if !isYAML && !isJSON { + // Skip non-YAML/JSON files + return nil + } + + // Check if the file content contains a CRD + // Handle both single-document and multi-document YAML files + content := file.Content + + // For multi-document YAML, split by document separator and check each + documents := strings.Split(content, "\n---\n") + // Also handle documents separated by "---" at the start of a line + if len(documents) == 1 { + // Try splitting by lines starting with "---" + lines := strings.Split(content, "\n") + var docs []string + var currentDoc strings.Builder + for _, line := range lines { + if strings.TrimSpace(line) == "---" && currentDoc.Len() > 0 { + docs = append(docs, currentDoc.String()) + currentDoc.Reset() + } else { + if currentDoc.Len() > 0 { + currentDoc.WriteString("\n") + } + currentDoc.WriteString(line) + } + } + if currentDoc.Len() > 0 { + docs = append(docs, currentDoc.String()) + } + if len(docs) > 1 { + documents = docs + } + } + + // Check each document for CRD + hasCRD := false + for _, doc := range documents { + doc = strings.TrimSpace(doc) + if doc == "" { + continue + } + // Check for YAML format + if strings.Contains(doc, "kind: CustomResourceDefinition") { + hasCRD = true + break + } + // Check for JSON format + if strings.Contains(doc, "\"kind\":\"CustomResourceDefinition\"") || + strings.Contains(doc, `"kind":"CustomResourceDefinition"`) { + hasCRD = true + break + } + } + + if !hasCRD { + // File doesn't contain a CRD, skip it + return nil + } + + // File contains a CRD, process it + tempPath := filepath.Join(os.TempDir(), utils.GetRandomAlphabetsOfDigit(5)) + return ProcessContent(br, tempPath, file.Path) + } +} + // When passing a directory to extract charts and the format introspector is provided as file/dir interceptor i.e. ConvertToK8sManifest ensure the Recurese is off. It is required othweise we will process the dir as well as process the file in that dir separately. // Add more calrifying commment and entry inside docs. func dirInterceptor(br *bufio.Writer) walker.DirInterceptor { diff --git a/generators/github/scheme_interface.go b/generators/github/scheme_interface.go index cda361e7..25d47c87 100644 --- a/generators/github/scheme_interface.go +++ b/generators/github/scheme_interface.go @@ -2,6 +2,7 @@ package github import ( "net/url" + "strings" "github.com/meshery/meshkit/generators/models" ) @@ -11,6 +12,14 @@ type DownloaderScheme interface { } func NewDownloaderForScheme(scheme string, url *url.URL, packageName string) DownloaderScheme { + // Check if this is a GitHub URL - route to GitRepo for automatic CRD discovery + if isGitHubURL(scheme, url) { + return GitRepo{ + URL: url, + PackageName: packageName, + } + } + switch scheme { case "git": return GitRepo{ @@ -27,3 +36,27 @@ func NewDownloaderForScheme(scheme string, url *url.URL, packageName string) Dow } return nil } + +// isGitHubURL checks if the URL is a GitHub repository URL +// This enables automatic CRD discovery for standard GitHub URLs +func isGitHubURL(scheme string, url *url.URL) bool { + host := strings.ToLower(url.Host) + // Check for github.com domain + if host == "github.com" || strings.HasSuffix(host, ".github.com") { + // Check if it looks like a repository URL (has at least owner/repo in path) + path := strings.TrimPrefix(url.Path, "/") + parts := strings.Split(path, "/") + // Valid GitHub repo URL should have at least owner and repo + if len(parts) >= 2 && parts[0] != "" && parts[1] != "" { + // Exclude certain paths that aren't repositories + excluded := []string{"settings", "explore", "marketplace", "pulls", "issues", "new", "organizations", "login", "join"} + for _, exclude := range excluded { + if parts[0] == exclude { + return false + } + } + return true + } + } + return false +} From a0e8d5a1e930cf0609a2d230a63f24cc28d2cda3 Mon Sep 17 00:00:00 2001 From: Abhijeet Dey Date: Sat, 10 Jan 2026 00:49:02 +0530 Subject: [PATCH 2/4] Fixed the failing check Signed-off-by: Abhijeet Dey --- .golangci.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 5a659553..72948c05 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -2,12 +2,3 @@ version: "2" run: timeout: 5m - -linters-settings: - staticcheck: - go: "1.25" - checks: ["all", "-ST1005"] - -issues: - exclude: - - "ST1005" \ No newline at end of file From 3002a3c4f7f89026b6a97f1e97ec9894f55b98aa Mon Sep 17 00:00:00 2001 From: Abhijeet Dey Date: Fri, 16 Jan 2026 10:38:41 +0530 Subject: [PATCH 3/4] done with the requested change Signed-off-by: Abhijeet Dey --- .golangci.yml | 2 - generators/github/git_repo.go | 68 +++++++++++++++------------ generators/github/scheme_interface.go | 48 +++++++++++-------- 3 files changed, 66 insertions(+), 52 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 72948c05..600bef78 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,4 +1,2 @@ -version: "2" - run: timeout: 5m diff --git a/generators/github/git_repo.go b/generators/github/git_repo.go index b4a39608..bf9af5c0 100644 --- a/generators/github/git_repo.go +++ b/generators/github/git_repo.go @@ -93,60 +93,68 @@ func (gr GitRepo) GetContent() (models.Package, error) { }, nil } -func (gr GitRepo) extractRepoDetailsFromSourceURL() (owner, repo, branch, root string, err error) { - path := strings.TrimPrefix(gr.URL.Path, "/") - parts := strings.Split(path, "/") - size := len(parts) +// parseGitURL parses a git URL and extracts owner, repo, branch, and path components +func parseGitURL(rawURL *url.URL) (owner, repo, branch, path string, err error) { + urlPath := strings.Trim(rawURL.Path, "/") + urlPath = strings.TrimSuffix(urlPath, ".git") - // Minimum required: owner and repo - if size < 2 { - err = ErrInvalidGitHubSourceURL(fmt.Errorf("Source URL %s is invalid, must specify at least owner and repo", gr.URL.String())) + if urlPath == "" { + err = fmt.Errorf("empty path in URL: %s", rawURL.String()) return } + parts := strings.Split(urlPath, "/") + if len(parts) < 2 { + err = fmt.Errorf("invalid git URL format: must have at least owner/repo in path: %s", rawURL.String()) + return + } owner = parts[0] repo = parts[1] - // Remove .git suffix from repo name if present - repo = strings.TrimSuffix(repo, ".git") + // Default branch + branch = "main" + path = "" - // Handle standard GitHub URL formats: + // Handle different GitHub URL formats // - https://github.com/owner/repo // - https://github.com/owner/repo/tree/branch // - https://github.com/owner/repo/tree/branch/path/to/dir + // - https://github.com/owner/repo/blob/branch/path/to/file // - git://github.com/owner/repo/branch/path (legacy format) - branch = "main" // default branch - root = "" - - if size >= 3 { - // Check if this is a standard GitHub URL with /tree/branch format - if parts[2] == "tree" && size >= 4 { - // Format: owner/repo/tree/branch[/path...] - branch = parts[3] - if size > 4 { - // Reconstruct the path after branch - root = strings.Join(parts[4:], "/") + if len(parts) >= 3 { + if parts[2] == "tree" { + if len(parts) >= 4 { + branch = parts[3] + if len(parts) > 4 { + path = strings.Join(parts[4:], "/") + } } } else if parts[2] == "blob" { - // Format: owner/repo/blob/branch/path/to/file - // This is a file URL, not a directory - we'll treat it as root path - if size >= 4 { + if len(parts) >= 4 { branch = parts[3] - if size > 4 { - root = strings.Join(parts[4:], "/") + if len(parts) > 4 { + path = strings.Join(parts[4:], "/") } } } else { - // Legacy format: owner/repo/branch[/path...] branch = parts[2] - if size > 3 { - // Reconstruct the path after branch - root = strings.Join(parts[3:], "/") + if len(parts) > 3 { + path = strings.Join(parts[3:], "/") } } } + return owner, repo, branch, path, nil +} + +func (gr GitRepo) extractRepoDetailsFromSourceURL() (owner, repo, branch, root string, err error) { + owner, repo, branch, root, err = parseGitURL(gr.URL) + if err != nil { + err = ErrInvalidGitHubSourceURL(err) + return + } + // If root is empty, we'll use "/**" for recursive traversal in GetContent // This enables automatic CRD discovery diff --git a/generators/github/scheme_interface.go b/generators/github/scheme_interface.go index 25d47c87..e2d88159 100644 --- a/generators/github/scheme_interface.go +++ b/generators/github/scheme_interface.go @@ -37,26 +37,34 @@ func NewDownloaderForScheme(scheme string, url *url.URL, packageName string) Dow return nil } -// isGitHubURL checks if the URL is a GitHub repository URL -// This enables automatic CRD discovery for standard GitHub URLs -func isGitHubURL(scheme string, url *url.URL) bool { - host := strings.ToLower(url.Host) - // Check for github.com domain - if host == "github.com" || strings.HasSuffix(host, ".github.com") { - // Check if it looks like a repository URL (has at least owner/repo in path) - path := strings.TrimPrefix(url.Path, "/") - parts := strings.Split(path, "/") - // Valid GitHub repo URL should have at least owner and repo - if len(parts) >= 2 && parts[0] != "" && parts[1] != "" { - // Exclude certain paths that aren't repositories - excluded := []string{"settings", "explore", "marketplace", "pulls", "issues", "new", "organizations", "login", "join"} - for _, exclude := range excluded { - if parts[0] == exclude { - return false - } - } - return true +func isGitHubURL(scheme string, u *url.URL) bool { + host := strings.ToLower(u.Host) + if host != "github.com" && !strings.HasSuffix(host, ".github.com") { + return false + } + if strings.HasPrefix(host, "gist.") { + return false + } + + path := strings.Trim(u.Path, "/") + path = strings.TrimSuffix(path, ".git") + + if path == "" { + return false + } + + parts := strings.Split(path, "/") + + if len(parts) < 2 || parts[0] == "" || parts[1] == "" { + return false + } + + + excluded := []string{"settings", "explore", "marketplace", "pulls", "issues", "new", "organizations", "login", "join", "logout", "pricing", "blog"} + for _, exclude := range excluded { + if parts[0] == exclude { + return false } } - return false + return true } From 4194738f676408bba1a6c348b3bb20e00ac65ba3 Mon Sep 17 00:00:00 2001 From: Abhijeet Dey Date: Mon, 19 Jan 2026 21:44:15 +0530 Subject: [PATCH 4/4] Fixed the changes used library and gemini error fixed Signed-off-by: Abhijeet Dey --- generators/github/git_repo.go | 68 +++++++-------------------- generators/github/scheme_interface.go | 35 ++++---------- go.mod | 1 + 3 files changed, 27 insertions(+), 77 deletions(-) diff --git a/generators/github/git_repo.go b/generators/github/git_repo.go index bf9af5c0..577fdec4 100644 --- a/generators/github/git_repo.go +++ b/generators/github/git_repo.go @@ -6,8 +6,10 @@ import ( "net/url" "os" "path/filepath" + "regexp" "strings" + giturlparse "github.com/git-download-manager/git-url-parse" "github.com/meshery/meshkit/generators/models" "github.com/meshery/meshkit/utils" "github.com/meshery/meshkit/utils/helm" @@ -95,56 +97,23 @@ func (gr GitRepo) GetContent() (models.Package, error) { // parseGitURL parses a git URL and extracts owner, repo, branch, and path components func parseGitURL(rawURL *url.URL) (owner, repo, branch, path string, err error) { - urlPath := strings.Trim(rawURL.Path, "/") - urlPath = strings.TrimSuffix(urlPath, ".git") - - if urlPath == "" { - err = fmt.Errorf("empty path in URL: %s", rawURL.String()) - return + gitRepository := giturlparse.NewGitRepository("", "", rawURL.String(), "") + if err := gitRepository.Parse("", 0, ""); err != nil { + return "", "", "", "", err } - parts := strings.Split(urlPath, "/") - - if len(parts) < 2 { - err = fmt.Errorf("invalid git URL format: must have at least owner/repo in path: %s", rawURL.String()) - return + + owner = gitRepository.Owner + repo = gitRepository.Name + branch = gitRepository.Branch + if branch == "" { + branch = "main" } - owner = parts[0] - repo = parts[1] - - // Default branch - branch = "main" - path = "" - - // Handle different GitHub URL formats - // - https://github.com/owner/repo - // - https://github.com/owner/repo/tree/branch - // - https://github.com/owner/repo/tree/branch/path/to/dir - // - https://github.com/owner/repo/blob/branch/path/to/file - // - git://github.com/owner/repo/branch/path (legacy format) - - if len(parts) >= 3 { - if parts[2] == "tree" { - if len(parts) >= 4 { - branch = parts[3] - if len(parts) > 4 { - path = strings.Join(parts[4:], "/") - } - } - } else if parts[2] == "blob" { - if len(parts) >= 4 { - branch = parts[3] - if len(parts) > 4 { - path = strings.Join(parts[4:], "/") - } - } - } else { - branch = parts[2] - if len(parts) > 3 { - path = strings.Join(parts[3:], "/") - } - } + path = gitRepository.Path + + if owner == "" || repo == "" { + return "", "", "", "", fmt.Errorf("invalid git URL format: must have at least owner/repo in path: %s", rawURL.String()) } - + return owner, repo, branch, path, nil } @@ -226,13 +195,12 @@ func crdAwareFileInterceptor(br *bufio.Writer) walker.FileInterceptor { continue } // Check for YAML format - if strings.Contains(doc, "kind: CustomResourceDefinition") { + if match, _ := regexp.MatchString(`kind:\s*CustomResourceDefinition`, doc); match { hasCRD = true break } // Check for JSON format - if strings.Contains(doc, "\"kind\":\"CustomResourceDefinition\"") || - strings.Contains(doc, `"kind":"CustomResourceDefinition"`) { + if match, _ := regexp.MatchString(`"kind"\s*:\s*"CustomResourceDefinition"`, doc); match { hasCRD = true break } diff --git a/generators/github/scheme_interface.go b/generators/github/scheme_interface.go index e2d88159..56d18acd 100644 --- a/generators/github/scheme_interface.go +++ b/generators/github/scheme_interface.go @@ -2,8 +2,8 @@ package github import ( "net/url" - "strings" + giturlparse "github.com/git-download-manager/git-url-parse" "github.com/meshery/meshkit/generators/models" ) @@ -13,7 +13,7 @@ type DownloaderScheme interface { func NewDownloaderForScheme(scheme string, url *url.URL, packageName string) DownloaderScheme { // Check if this is a GitHub URL - route to GitRepo for automatic CRD discovery - if isGitHubURL(scheme, url) { + if isGitHubURL(url) { return GitRepo{ URL: url, PackageName: packageName, @@ -37,34 +37,15 @@ func NewDownloaderForScheme(scheme string, url *url.URL, packageName string) Dow return nil } -func isGitHubURL(scheme string, u *url.URL) bool { - host := strings.ToLower(u.Host) - if host != "github.com" && !strings.HasSuffix(host, ".github.com") { +func isGitHubURL(u *url.URL) bool { + gitRepository := giturlparse.NewGitRepository("", "", u.String(), "") + if err := gitRepository.Parse("", 0, ""); err != nil { return false } - if strings.HasPrefix(host, "gist.") { - return false - } - - path := strings.Trim(u.Path, "/") - path = strings.TrimSuffix(path, ".git") - - if path == "" { - return false - } - - parts := strings.Split(path, "/") - - if len(parts) < 2 || parts[0] == "" || parts[1] == "" { + + if gitRepository.Hostname != "github.com" { return false } - - excluded := []string{"settings", "explore", "marketplace", "pulls", "issues", "new", "organizations", "login", "join", "logout", "pricing", "blog"} - for _, exclude := range excluded { - if parts[0] == exclude { - return false - } - } - return true + return gitRepository.Owner != "" && gitRepository.Name != "" } diff --git a/go.mod b/go.mod index bb542625..11c763fb 100644 --- a/go.mod +++ b/go.mod @@ -18,6 +18,7 @@ require ( github.com/docker/cli v27.5.1+incompatible github.com/fluxcd/pkg/oci v0.43.1 github.com/fluxcd/pkg/tar v0.10.0 + github.com/git-download-manager/git-url-parse v1.0.3 github.com/go-git/go-git/v5 v5.16.4 github.com/go-logr/logr v1.4.3 github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1