Skip to content

Commit 84d9474

Browse files
authored
Use sparse checkout for components (#1701)
* proof of concept for sparse checkout * fix linter errors * fix inner include and test * remove debug log * use remote.host and remote.port because components should be on the same instance * cache the parsed components * refactor local include expansion * remove utils.remoteFileExist * workaround for git archive * delete include temporary directories * fix linter errors * fix cp
1 parent 07edf77 commit 84d9474

File tree

4 files changed

+124
-159
lines changed

4 files changed

+124
-159
lines changed

src/parser-includes.ts

Lines changed: 121 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,15 @@ type ParserIncludesInitOptions = {
2323
maximumIncludes: number;
2424
};
2525

26+
type ParsedComponent = {
27+
domain: string;
28+
port: string;
29+
projectPath: string;
30+
name: string;
31+
ref: string;
32+
isLocal: boolean;
33+
};
34+
2635
export class ParserIncludes {
2736
private static count: number = 0;
2837

@@ -53,12 +62,14 @@ export class ParserIncludes {
5362
let includeDatas: any[] = [];
5463
const promises = [];
5564
const {stateDir, cwd, fetchIncludes, gitData, expandVariables} = opts;
65+
// cache the parsed component, because parseIncludeComponent is expensive and we would call it twice otherwise
66+
const componentParseCache = new Map<number, ParsedComponent>();
5667

5768
const include = this.expandInclude(gitlabData?.include, opts.variables);
5869

5970
this.normalizeTriggerInclude(gitlabData, opts);
6071
// Find files to fetch from remote and place in .gitlab-ci-local/includes
61-
for (const value of include) {
72+
for (const [index, value] of include.entries()) {
6273
if (value["rules"]) {
6374
const include_rules = value["rules"];
6475
const rulesResult = Utils.getRulesResult({argv, cwd, rules: include_rules, variables: opts.variables}, gitData);
@@ -76,13 +87,20 @@ export class ParserIncludes {
7687
promises.push(this.downloadIncludeRemote(cwd, stateDir, url, fetchIncludes));
7788
} else if (value["remote"]) {
7889
promises.push(this.downloadIncludeRemote(cwd, stateDir, value["remote"], fetchIncludes));
90+
} else if (value["component"]) {
91+
const component = this.parseIncludeComponent(value["component"], gitData);
92+
componentParseCache.set(index, component);
93+
if (!component.isLocal)
94+
{
95+
promises.push(this.downloadIncludeComponent(cwd, stateDir, component.projectPath, component.ref, component.name, gitData, fetchIncludes));
96+
}
7997
}
8098

8199
}
82100

83101
await Promise.all(promises);
84102

85-
for (const value of include) {
103+
for (const [index, value] of include.entries()) {
86104
if (value["rules"]) {
87105
const include_rules = value["rules"];
88106
const rulesResult = Utils.getRulesResult({argv, cwd, rules: include_rules, variables: opts.variables}, gitData);
@@ -107,73 +125,32 @@ export class ParserIncludes {
107125
, {inputs: value.inputs || {}}
108126
, expandVariables);
109127
// Expand local includes inside a "project"-like include
110-
fileDoc["include"] = this.expandInclude(fileDoc["include"], opts.variables);
111-
fileDoc["include"].forEach((inner: any, i: number) => {
112-
if (!inner["local"]) return;
113-
if (inner["rules"]) {
114-
const rulesResult = Utils.getRulesResult({argv, cwd: opts.cwd, variables: opts.variables, rules: inner["rules"]}, gitData);
115-
if (rulesResult.when === "never") {
116-
return;
117-
}
118-
}
119-
fileDoc["include"][i] = {
120-
project: value["project"],
121-
file: inner["local"].replace(/^\//, ""),
122-
ref: value["ref"],
123-
inputs: inner.inputs || {},
124-
};
125-
});
126-
128+
fileDoc["include"] = this.expandInnerLocalIncludes(fileDoc["include"], value["project"], value["ref"], opts);
127129
includeDatas = includeDatas.concat(await this.init(fileDoc, opts));
128130
}
129131
} else if (value["component"]) {
130-
const {domain, port, projectPath, componentName, ref, isLocalComponent} = this.parseIncludeComponent(value["component"], gitData);
131-
// converts component to project. gitlab allows two different file path ways to include a component
132-
let files = [`${componentName}.yml`, `${componentName}/template.yml`, null];
133-
134-
// If a file is present locally, keep only that one in the files array to avoid downloading the other one that never exists
135-
if (!argv.fetchIncludes) {
136-
for (const f of files) {
137-
const localFileName = `${cwd}/${stateDir}/includes/${gitData.remote.host}/${projectPath}/${ref}/${f}`;
138-
if (fs.existsSync(localFileName)) {
139-
files = [f];
140-
break;
141-
}
142-
}
143-
}
132+
const component = componentParseCache.get(index);
133+
assert(component !== undefined, `Internal error, component parse cache missing entry [${index}]`);
134+
// Gitlab allows two different file paths to include a component
135+
const files = [`${component.name}.yml`, `${component.name}/template.yml`];
144136

137+
let file = null;
145138
for (const f of files) {
146-
assert(f !== null, `This GitLab CI configuration is invalid: component: \`${value["component"]}\`. One of the files [${files}] must exist in \`${domain}` +
147-
(port ? `:${port}` : "") + `/${projectPath}\``);
148-
149-
if (isLocalComponent) {
150-
const localComponentInclude = `${cwd}/${f}`;
151-
if (!(await fs.pathExists(localComponentInclude))) {
152-
continue;
153-
}
154-
155-
const content = await Parser.loadYaml(localComponentInclude, {inputs: value.inputs || {}}, expandVariables);
156-
includeDatas = includeDatas.concat(await this.init(content, opts));
157-
break;
158-
} else {
159-
const localFileName = `${cwd}/${stateDir}/includes/${gitData.remote.host}/${projectPath}/${ref}/${f}`;
160-
// Check remotely only if the file does not exist locally
161-
if (!fs.existsSync(localFileName) && !(await Utils.remoteFileExist(cwd, f, ref, domain, projectPath, gitData.remote.schema, gitData.remote.port))) {
162-
continue;
163-
}
164-
165-
const fileDoc = {
166-
include: {
167-
project: projectPath,
168-
file: f,
169-
ref: ref,
170-
inputs: value.inputs || {},
171-
},
172-
};
173-
includeDatas = includeDatas.concat(await this.init(fileDoc, opts));
174-
break;
139+
let searchPath = `${cwd}/${f}`;
140+
if (!component.isLocal) {
141+
searchPath = `${cwd}/${stateDir}/includes/${gitData.remote.host}/${component.projectPath}/${component.ref}/${f}`;
142+
}
143+
if (fs.existsSync(searchPath)) {
144+
file = searchPath;
175145
}
176146
}
147+
assert(file !== null, `This GitLab CI configuration is invalid: component: \`${value["component"]}\`. One of the files [${files}] must exist in \`${component.domain}` +
148+
(component.port ? `:${component.port}` : "") + `/${component.projectPath}\``);
149+
150+
const fileDoc = await Parser.loadYaml(file, {inputs: value.inputs || {}}, expandVariables);
151+
// Expand local includes inside to a "project"-like include
152+
fileDoc["include"] = this.expandInnerLocalIncludes(fileDoc["include"], component.projectPath, component.ref, opts);
153+
includeDatas = includeDatas.concat(await this.init(fileDoc, opts));
177154
} else if (value["template"]) {
178155
const {project, ref, file, domain} = this.covertTemplateToProjectFile(value["template"]);
179156
const fsUrl = Utils.fsUrl(`https://${domain}/${project}/-/raw/${ref}/${file}`);
@@ -238,7 +215,7 @@ export class ParserIncludes {
238215
};
239216
}
240217

241-
static parseIncludeComponent (component: string, gitData: GitData): {domain: string; port: string; projectPath: string; componentName: string; ref: string; isLocalComponent: boolean} {
218+
static parseIncludeComponent (component: string, gitData: GitData): ParsedComponent {
242219
assert(!component.includes("://"), `This GitLab CI configuration is invalid: component: \`${component}\` should not contain protocol`);
243220
const pattern = /(?<domain>[^/:\s]+)(:(?<port>\d+))?\/(?<projectPath>.+)\/(?<componentName>[^@]+)@(?<ref>.+)/; // https://regexr.com/7v7hm
244221
const gitRemoteMatch = pattern.exec(component);
@@ -254,10 +231,10 @@ export class ParserIncludes {
254231
if (ref == "~latest" || semanticVersionRangesPattern.test(ref)) {
255232
// https://docs.gitlab.com/ci/components/#semantic-version-ranges
256233
let stdout;
257-
try {
234+
if (gitData.remote.schema == "git" || gitData.remote.schema == "ssh") {
258235
stdout = Utils.syncSpawn(["git", "ls-remote", "--tags", `git@${domain}:${projectPath}`]).stdout;
259-
} catch {
260-
stdout = Utils.syncSpawn(["git", "ls-remote", "--tags", `https://${domain}:${port ?? 443}/${projectPath}.git`]).stdout;
236+
} else {
237+
stdout = Utils.syncSpawn(["git", "ls-remote", "--tags", `${gitData.remote.schema}://${domain}:${port ?? 443}/${projectPath}.git`]).stdout;
261238
}
262239
assert(stdout);
263240
const tags = stdout
@@ -276,12 +253,34 @@ export class ParserIncludes {
276253
domain: domain,
277254
port: port,
278255
projectPath: projectPath,
279-
componentName: `templates/${gitRemoteMatch.groups["componentName"]}`,
256+
name: `templates/${gitRemoteMatch.groups["componentName"]}`,
280257
ref: ref,
281-
isLocalComponent: isLocalComponent,
258+
isLocal: isLocalComponent,
282259
};
283260
}
284261

262+
// Expand local includes inside to a "project"-like include
263+
static expandInnerLocalIncludes (fileIncludes: any, projectPath: string, ref: string, opts: ParserIncludesInitOptions) {
264+
const {argv} = opts;
265+
const updatedIncludes = this.expandInclude(fileIncludes, opts.variables);
266+
updatedIncludes.forEach((inner: any, i: number) => {
267+
if (!inner["local"]) return;
268+
if (inner["rules"]) {
269+
const rulesResult = Utils.getRulesResult({argv, cwd: opts.cwd, variables: opts.variables, rules: inner["rules"]}, opts.gitData);
270+
if (rulesResult.when === "never") {
271+
return;
272+
}
273+
}
274+
updatedIncludes[i] = {
275+
project: projectPath,
276+
file: inner["local"].replace(/^\//, ""),
277+
ref: ref,
278+
inputs: inner.inputs || {},
279+
};
280+
});
281+
return updatedIncludes;
282+
}
283+
285284
static async downloadIncludeRemote (cwd: string, stateDir: string, url: string, fetchIncludes: boolean): Promise<void> {
286285
const fsUrl = Utils.fsUrl(url);
287286
try {
@@ -301,30 +300,81 @@ export class ParserIncludes {
301300
static async downloadIncludeProjectFile (cwd: string, stateDir: string, project: string, ref: string, file: string, gitData: GitData, fetchIncludes: boolean): Promise<void> {
302301
const remote = gitData.remote;
303302
const normalizedFile = file.replace(/^\/+/, "");
303+
let tmpDir = null;
304304
try {
305305
const target = `${stateDir}/includes/${remote.host}/${project}/${ref}`;
306306
if (await fs.pathExists(`${cwd}/${target}/${normalizedFile}`) && !fetchIncludes) return;
307307

308308
if (remote.schema.startsWith("http")) {
309309
const ext = "tmp-" + Math.random();
310310
await fs.mkdirp(path.dirname(`${cwd}/${target}/${normalizedFile}`));
311+
tmpDir = `${cwd}/${target}.${ext}`;
311312

312313
const gitCloneBranch = (ref === "HEAD") ? "" : `--branch ${ref}`;
313314
await Utils.bashMulti([
314315
`cd ${cwd}/${stateDir}`,
315-
`git clone ${gitCloneBranch} -n --depth=1 --filter=tree:0 ${remote.schema}://${remote.host}:${remote.port}/${project}.git ${cwd}/${target}.${ext}`,
316-
`cd ${cwd}/${target}.${ext}`,
316+
`git clone ${gitCloneBranch} -n --depth=1 --filter=tree:0 ${remote.schema}://${remote.host}:${remote.port}/${project}.git ${tmpDir}`,
317+
`cd ${tmpDir}`,
317318
`git sparse-checkout set --no-cone ${normalizedFile}`,
318319
"git checkout",
319320
`cd ${cwd}/${stateDir}`,
320-
`cp ${cwd}/${target}.${ext}/${normalizedFile} ${cwd}/${target}/${normalizedFile}`,
321+
`cp ${tmpDir}/${normalizedFile} ${cwd}/${target}/${normalizedFile}`,
321322
], cwd);
322323
} else {
323324
await fs.mkdirp(`${cwd}/${target}`);
324325
await Utils.bash(`set -eou pipefail; git archive --remote=ssh://git@${remote.host}:${remote.port}/${project}.git ${ref} ${normalizedFile} | tar -f - -xC ${target}/`, cwd);
325326
}
326327
} catch (e) {
327328
throw new AssertionError({message: `Project include could not be fetched { project: ${project}, ref: ${ref}, file: ${normalizedFile} }\n${e}`});
329+
} finally {
330+
if (tmpDir !== null) {
331+
// always cleanup temporary directory (if created)
332+
await fs.rm(tmpDir, {recursive: true, force: true});
333+
}
334+
}
335+
}
336+
337+
static async downloadIncludeComponent (cwd: string, stateDir: string, project: string, ref: string, componentName: string, gitData: GitData, fetchIncludes: boolean): Promise<void> {
338+
const remote = gitData.remote;
339+
const files = [`${componentName}.yml`, `${componentName}/template.yml`];
340+
let tmpDir = null;
341+
try {
342+
const target = `${stateDir}/includes/${remote.host}/${project}/${ref}`;
343+
344+
if (!fetchIncludes && (await fs.pathExists(`${cwd}/${target}/${files[0]}`) || await fs.pathExists(`${cwd}/${target}/${files[1]}`))) return;
345+
346+
if (remote.schema.startsWith("http")) {
347+
const ext = "tmp-" + Math.random();
348+
await fs.mkdirp(path.dirname(`${cwd}/${target}/templates`));
349+
tmpDir = `${cwd}/${target}.${ext}`;
350+
351+
const gitCloneBranch = (ref === "HEAD") ? "" : `--branch ${ref}`;
352+
await Utils.bashMulti([
353+
`cd ${cwd}/${stateDir}`,
354+
`git clone ${gitCloneBranch} -n --depth=1 --filter=tree:0 ${remote.schema}://${remote.host}:${remote.port}/${project}.git ${tmpDir}`,
355+
`cd ${tmpDir}`,
356+
`git sparse-checkout set --no-cone ${files[0]} ${files[1]}`,
357+
"git checkout",
358+
`cd ${cwd}/${stateDir}`,
359+
`mkdir -p ${tmpDir}/templates`, // create templates subdir (if it doesn't exist), as the check out may not create it
360+
`cp -r ${tmpDir}/templates ${cwd}/${target}`,
361+
], cwd);
362+
} else {
363+
// git archive fails if the paths do not exist, to work around this we use a wildcard "templates/component*.yml"
364+
// this resolves to either "templates/component.yml" or "templates/component/template.yml"
365+
// if both exist "templates/component.yml" will be pulled
366+
// Drawback: also pulls all other .yml files from templates/component/ directory
367+
const componentWildcard = `${componentName}*.yml`;
368+
await fs.mkdirp(`${cwd}/${target}`);
369+
await Utils.bash(`set -eou pipefail; git archive --remote=ssh://git@${remote.host}:${remote.port}/${project}.git ${ref} ${componentWildcard} | tar -f - -xC ${target}/`, cwd);
370+
}
371+
} catch (e) {
372+
throw new AssertionError({message: `Component include could not be fetched { project: ${project}, ref: ${ref}, file: ${files} }\n${e}`});
373+
} finally {
374+
if (tmpDir !== null) {
375+
// always cleanup temporary directory (if created)
376+
await fs.rm(tmpDir, {recursive: true, force: true});
377+
}
328378
}
329379
}
330380

src/utils.ts

Lines changed: 2 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@ import base64url from "base64url";
88
import execa from "execa";
99
import assert from "assert";
1010
import {CICDVariable} from "./variables-from-files.js";
11-
import {GitData, GitSchema} from "./git-data.js";
11+
import {GitData} from "./git-data.js";
1212
import globby from "globby";
1313
import micromatch from "micromatch";
14-
import axios, {AxiosRequestConfig} from "axios";
14+
import {AxiosRequestConfig} from "axios";
1515
import path from "path";
1616
import {Argv} from "./argv.js";
17-
import {MIMEType} from "node:util";
1817

1918
type RuleResultOpt = {
2019
argv: Argv;
@@ -403,41 +402,6 @@ export class Utils {
403402
return Object.getPrototypeOf(v) === Object.prototype;
404403
}
405404

406-
static async remoteFileExist (cwd: string, file: string, ref: string, domain: string, projectPath: string, protocol: GitSchema, port: string) {
407-
switch (protocol) {
408-
case "ssh":
409-
case "git":
410-
try {
411-
await Utils.spawn(`git archive --remote=ssh://git@${domain}:${port}/${projectPath}.git ${ref} ${file}`.split(" "), cwd);
412-
return true;
413-
} catch (e: any) {
414-
if (!e.stderr.includes(`remote: fatal: pathspec '${file}' did not match any files`)) throw new Error(e);
415-
return false;
416-
}
417-
418-
case "http":
419-
case "https": {
420-
try {
421-
const axiosConfig: AxiosRequestConfig = Utils.getAxiosProxyConfig();
422-
const {status, headers} = await axios.get(
423-
`${protocol}://${domain}:${port}/${projectPath}/-/raw/${ref}/${file}`,
424-
axiosConfig,
425-
);
426-
const mimeType = new MIMEType(headers["content-type"]);
427-
return (
428-
status === 200 &&
429-
(mimeType.type === "text" && mimeType.subtype === "plain") // handles scenario where self-hosted gitlab returns statuscode 200 when file does not exist
430-
);
431-
} catch {
432-
return false;
433-
}
434-
}
435-
default: {
436-
Utils.switchStatementExhaustiveCheck(protocol);
437-
}
438-
}
439-
}
440-
441405
static switchStatementExhaustiveCheck (param: never): never {
442406
// https://dev.to/babak/exhaustive-type-checking-with-typescript-4l3f
443407
throw new Error(`Unhandled case ${param}`);

tests/test-cases/include-component/integration.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ test("include-component no component template file (protocol: https)", async ()
2020
expect(true).toBe(false);
2121
} catch (e: any) {
2222
assert(e instanceof AssertionError, `Unexpected error thrown:\n ${e}`);
23-
expect(e.message).toBe("This GitLab CI configuration is invalid: component: `gitlab.com/components/go/potato@0.3.1`. One of the files [templates/potato.yml,templates/potato/template.yml,] must exist in `gitlab.com/components/go`");
23+
expect(e.message).toBe("This GitLab CI configuration is invalid: component: `gitlab.com/components/go/potato@0.3.1`. One of the files [templates/potato.yml,templates/potato/template.yml] must exist in `gitlab.com/components/go`");
2424
}
2525
});
2626

0 commit comments

Comments
 (0)