-
Notifications
You must be signed in to change notification settings - Fork 185
Improved CRD discovery to automatically scan repositories #886
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
eb5a2eb
70ff340
8f80b98
a0e8d5a
3002a3c
100a36c
4194738
c4feb04
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,13 +1,2 @@ | ||
| version: "2" | ||
|
|
||
| run: | ||
| timeout: 5m | ||
|
|
||
| linters-settings: | ||
| staticcheck: | ||
| go: "1.25" | ||
| checks: ["all", "-ST1005"] | ||
|
|
||
| issues: | ||
| exclude: | ||
| - "ST1005" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,8 +6,10 @@ import ( | |
| "net/url" | ||
| "os" | ||
| "path/filepath" | ||
| "regexp" | ||
| "strings" | ||
|
|
||
| giturlparse "github.com/git-download-manager/git-url-parse" | ||
| "github.com/meshery/meshkit/generators/models" | ||
| "github.com/meshery/meshkit/utils" | ||
| "github.com/meshery/meshkit/utils/helm" | ||
|
|
@@ -50,13 +52,29 @@ func (gr GitRepo) GetContent() (models.Package, error) { | |
| _ = br.Flush() | ||
| _ = fd.Close() | ||
| }() | ||
|
|
||
| // If root is not specified, enable recursive traversal from root to discover CRDs automatically | ||
| // This makes the generator robust to repository structure changes | ||
| rootPath := root | ||
| isAutoDiscovery := rootPath == "" | ||
| if isAutoDiscovery { | ||
| // Use "/**" to enable recursive traversal from repository root | ||
| rootPath = "/**" | ||
| } | ||
|
|
||
| gw := gitWalker. | ||
| Owner(owner). | ||
| Repo(repo). | ||
| Branch(branch). | ||
| Root(root). | ||
| RegisterFileInterceptor(fileInterceptor(br)). | ||
| RegisterDirInterceptor(dirInterceptor(br)) | ||
| Root(rootPath). | ||
| RegisterFileInterceptor(crdAwareFileInterceptor(br)) | ||
|
|
||
| // Register dirInterceptor to handle Helm charts which may contain CRDs | ||
| // Note: When doing automatic discovery (recurse mode), dirInterceptor processes directories | ||
| // and fileInterceptor processes files. For Helm charts, dirInterceptor extracts CRDs from | ||
| // the chart structure, while fileInterceptor finds standalone CRD files. This ensures we | ||
| // discover CRDs in both formats without missing any. | ||
| gw = gw.RegisterDirInterceptor(dirInterceptor(br)) | ||
|
|
||
| if version != "" { | ||
| gw = gw.ReferenceName(fmt.Sprintf("refs/tags/%s", version)) | ||
|
|
@@ -77,32 +95,128 @@ func (gr GitRepo) GetContent() (models.Package, error) { | |
| }, nil | ||
| } | ||
|
|
||
| // parseGitURL parses a git URL and extracts owner, repo, branch, and path components | ||
| func parseGitURL(rawURL *url.URL) (owner, repo, branch, path string, err error) { | ||
| gitRepository := giturlparse.NewGitRepository("", "", rawURL.String(), "") | ||
| if err := gitRepository.Parse("", 0, ""); err != nil { | ||
| return "", "", "", "", err | ||
| } | ||
|
|
||
| owner = gitRepository.Owner | ||
| repo = gitRepository.Name | ||
| branch = gitRepository.Branch | ||
| if branch == "" { | ||
| branch = "main" | ||
| } | ||
| path = gitRepository.Path | ||
|
|
||
| if owner == "" || repo == "" { | ||
| return "", "", "", "", fmt.Errorf("invalid git URL format: must have at least owner/repo in path: %s", rawURL.String()) | ||
| } | ||
|
|
||
| return owner, repo, branch, path, nil | ||
| } | ||
|
|
||
| func (gr GitRepo) extractRepoDetailsFromSourceURL() (owner, repo, branch, root string, err error) { | ||
| parts := strings.SplitN(strings.TrimPrefix(gr.URL.Path, "/"), "/", 4) | ||
| size := len(parts) | ||
| if size > 3 { | ||
| owner = parts[0] | ||
| repo = parts[1] | ||
| branch = parts[2] | ||
| root = parts[3] | ||
|
|
||
| } else { | ||
| err = ErrInvalidGitHubSourceURL(fmt.Errorf("Source URL %s is invalid, specify owner, repo, branch and filepath in the url according to the specified source url format", gr.URL.String())) | ||
| owner, repo, branch, root, err = parseGitURL(gr.URL) | ||
| if err != nil { | ||
| err = ErrInvalidGitHubSourceURL(err) | ||
| return | ||
| } | ||
|
|
||
| // If root is empty, we'll use "/**" for recursive traversal in GetContent | ||
| // This enables automatic CRD discovery | ||
|
|
||
| return | ||
| } | ||
|
|
||
| func (gr GitRepo) ExtractRepoDetailsFromSourceURL() (owner, repo, branch, root string, err error) { | ||
| return gr.extractRepoDetailsFromSourceURL() | ||
| } | ||
|
|
||
| // fileInterceptor processes all files (original behavior) | ||
| func fileInterceptor(br *bufio.Writer) walker.FileInterceptor { | ||
| return func(file walker.File) error { | ||
| tempPath := filepath.Join(os.TempDir(), utils.GetRandomAlphabetsOfDigit(5)) | ||
| return ProcessContent(br, tempPath, file.Path) | ||
| } | ||
| } | ||
|
|
||
| // crdAwareFileInterceptor only processes files that contain CRDs | ||
| // This enables automatic CRD discovery without requiring specific directory paths | ||
| func crdAwareFileInterceptor(br *bufio.Writer) walker.FileInterceptor { | ||
| return func(file walker.File) error { | ||
| // Check if the file is a YAML/JSON file that might contain CRDs | ||
| fileName := strings.ToLower(file.Name) | ||
| isYAML := strings.HasSuffix(fileName, ".yaml") || strings.HasSuffix(fileName, ".yml") | ||
| isJSON := strings.HasSuffix(fileName, ".json") | ||
|
|
||
| if !isYAML && !isJSON { | ||
| // Skip non-YAML/JSON files | ||
| return nil | ||
| } | ||
|
|
||
| // Check if the file content contains a CRD | ||
| // Handle both single-document and multi-document YAML files | ||
| content := file.Content | ||
|
|
||
| // For multi-document YAML, split by document separator and check each | ||
| documents := strings.Split(content, "\n---\n") | ||
| // Also handle documents separated by "---" at the start of a line | ||
| if len(documents) == 1 { | ||
| // Try splitting by lines starting with "---" | ||
| lines := strings.Split(content, "\n") | ||
| var docs []string | ||
| var currentDoc strings.Builder | ||
| for _, line := range lines { | ||
| if strings.TrimSpace(line) == "---" && currentDoc.Len() > 0 { | ||
| docs = append(docs, currentDoc.String()) | ||
| currentDoc.Reset() | ||
| } else { | ||
| if currentDoc.Len() > 0 { | ||
| currentDoc.WriteString("\n") | ||
| } | ||
| currentDoc.WriteString(line) | ||
| } | ||
| } | ||
| if currentDoc.Len() > 0 { | ||
| docs = append(docs, currentDoc.String()) | ||
| } | ||
| if len(docs) > 1 { | ||
| documents = docs | ||
| } | ||
| } | ||
|
Comment on lines
+164
to
+188
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic for splitting multi-document YAML files is complex and has a few issues:
Consider simplifying this entire block with a more robust method, such as using a regular expression to split documents, which would be cleaner and less error-prone. |
||
|
|
||
| // Check each document for CRD | ||
| hasCRD := false | ||
| for _, doc := range documents { | ||
| doc = strings.TrimSpace(doc) | ||
| if doc == "" { | ||
| continue | ||
| } | ||
| // Check for YAML format | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use libraries, not string parsing.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. okay noted ! |
||
| if match, _ := regexp.MatchString(`kind:\s*CustomResourceDefinition`, doc); match { | ||
| hasCRD = true | ||
| break | ||
| } | ||
| // Check for JSON format | ||
| if match, _ := regexp.MatchString(`"kind"\s*:\s*"CustomResourceDefinition"`, doc); match { | ||
| hasCRD = true | ||
| break | ||
| } | ||
| } | ||
|
|
||
| if !hasCRD { | ||
| // File doesn't contain a CRD, skip it | ||
| return nil | ||
| } | ||
|
|
||
| // File contains a CRD, process it | ||
| tempPath := filepath.Join(os.TempDir(), utils.GetRandomAlphabetsOfDigit(5)) | ||
| return ProcessContent(br, tempPath, file.Path) | ||
| } | ||
| } | ||
|
|
||
| // When passing a directory to extract charts and the format introspector is provided as file/dir interceptor i.e. ConvertToK8sManifest ensure the Recurese is off. It is required othweise we will process the dir as well as process the file in that dir separately. | ||
| // Add more calrifying commment and entry inside docs. | ||
| func dirInterceptor(br *bufio.Writer) walker.DirInterceptor { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please consider reverting these changes since they are not focused on the specific issue and also they have been taken care in another PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure i ll revert the latest commit