diff --git a/.env.development b/.env.development index ddb981af6..04a53752b 100644 --- a/.env.development +++ b/.env.development @@ -6,8 +6,6 @@ DATABASE_URL="postgresql://postgres:postgres@localhost:5432/postgres" ZOEKT_WEBSERVER_URL="http://localhost:6070" # The command to use for generating ctags. CTAGS_COMMAND=ctags -# logging, strict -SRC_TENANT_ENFORCEMENT_MODE=strict # Auth.JS # You can generate a new secret with: diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ff4757af..fd71cf1e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] + + ### Added +- Added support for streaming code search results. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) +- Added buttons to toggle case sensitivity and regex patterns. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) - Added counts to members, requets, and invites tabs in the members settings. [#621](https://github.com/sourcebot-dev/sourcebot/pull/621) +### Changed +- Changed the default search behaviour to match patterns as substrings and **not** regular expressions. Regular expressions can be used by toggling the regex button in search bar. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) +- Renamed `public` query prefix to `visibility`. Allowed values for `visibility` are `public`, `private`, and `any`. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) +- Changed `archived` query prefix to accept values `yes`, `no`, and `only`. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) + +### Removed +- Removed `case` query prefix. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) +- Removed `branch` and `b` query prefixes. Please use `rev:` instead. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) +- Removed `regex` query prefix. [#623](https://github.com/sourcebot-dev/sourcebot/pull/623) + ### Fixed - Fixed spurious infinite loads with explore panel, file tree, and file search command. [#617](https://github.com/sourcebot-dev/sourcebot/pull/617) - Wipe search context on init if entitlement no longer exists [#618](https://github.com/sourcebot-dev/sourcebot/pull/618) diff --git a/Dockerfile b/Dockerfile index 41c677121..1390914ad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,10 +43,12 @@ COPY .yarn ./.yarn COPY ./packages/db ./packages/db COPY ./packages/schemas ./packages/schemas COPY ./packages/shared ./packages/shared +COPY ./packages/queryLanguage ./packages/queryLanguage RUN yarn workspace @sourcebot/db install RUN yarn workspace @sourcebot/schemas install RUN yarn workspace @sourcebot/shared install +RUN yarn workspace @sourcebot/query-language install # ------------------------------------ # ------ Build Web ------ @@ -92,6 +94,7 @@ COPY --from=shared-libs-builder /app/node_modules ./node_modules COPY --from=shared-libs-builder /app/packages/db ./packages/db COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas COPY --from=shared-libs-builder /app/packages/shared ./packages/shared +COPY --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage # Fixes arm64 timeouts RUN yarn workspace @sourcebot/web install @@ -130,6 +133,7 @@ COPY --from=shared-libs-builder /app/node_modules ./node_modules COPY --from=shared-libs-builder /app/packages/db ./packages/db COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas COPY --from=shared-libs-builder /app/packages/shared ./packages/shared +COPY --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage RUN yarn workspace @sourcebot/backend install RUN yarn workspace @sourcebot/backend build @@ -173,7 +177,6 @@ ENV DATA_DIR=/data ENV DATA_CACHE_DIR=$DATA_DIR/.sourcebot ENV DATABASE_DATA_DIR=$DATA_CACHE_DIR/db ENV REDIS_DATA_DIR=$DATA_CACHE_DIR/redis -ENV SRC_TENANT_ENFORCEMENT_MODE=strict ENV SOURCEBOT_PUBLIC_KEY_PATH=/app/public.pem # Valid values are: debug, info, warn, error @@ -217,6 +220,9 @@ COPY --from=zoekt-builder \ /cmd/zoekt-index \ /usr/local/bin/ +# Copy zoekt proto files (needed for gRPC client at runtime) +COPY vendor/zoekt/grpc/protos /app/vendor/zoekt/grpc/protos + # Copy all of the things COPY --from=web-builder /app/packages/web/public ./packages/web/public COPY --from=web-builder /app/packages/web/.next/standalone ./ @@ -229,6 +235,7 @@ COPY --from=shared-libs-builder /app/node_modules ./node_modules COPY --from=shared-libs-builder /app/packages/db ./packages/db COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas COPY --from=shared-libs-builder /app/packages/shared ./packages/shared +COPY --from=shared-libs-builder /app/packages/queryLanguage ./packages/queryLanguage # Fixes git "dubious ownership" issues when the volume is mounted with different permissions to the container. RUN git config --global safe.directory "*" diff --git a/docs/docs/features/search/syntax-reference.mdx b/docs/docs/features/search/syntax-reference.mdx index cde52d0e2..f57600024 100644 --- a/docs/docs/features/search/syntax-reference.mdx +++ b/docs/docs/features/search/syntax-reference.mdx @@ -4,32 +4,51 @@ title: Writing search queries Sourcebot uses a powerful regex-based query language that enabled precise code search within large codebases. - ## Syntax reference guide -Queries consist of space-separated regular expressions. Wrapping expressions in `""` combines them. By default, a file must have at least one match for each expression to be included. +Queries consist of space-separated search patterns that are matched against file contents. A file must have at least one match for each expression to be included. Queries can optionally contain search filters to further refine the search results. + +## Keyword search (default) + +Keyword search matches search patterns exactly in file contents. Wrapping search patterns in `""` combines them as a single expression. | Example | Explanation | | :--- | :--- | -| `foo` | Match files with regex `/foo/` | -| `foo bar` | Match files with regex `/foo/` **and** `/bar/` | -| `"foo bar"` | Match files with regex `/foo bar/` | +| `foo` | Match files containing the keyword `foo` | +| `foo bar` | Match files containing both `foo` **and** `bar` | +| `"foo bar"` | Match files containing the phrase `foo bar` | +| `"foo \"bar\""` | Match files containing `foo "bar"` exactly (escaped quotes) | + +## Regex search -Multiple expressions can be or'd together with `or`, negated with `-`, or grouped with `()`. +Toggle the regex button (`.*`) in the search bar to interpret search patterns as regular expressions. | Example | Explanation | | :--- | :--- | -| `foo or bar` | Match files with regex `/foo/` **or** `/bar/` | -| `foo -bar` | Match files with regex `/foo/` but **not** `/bar/` | -| `foo (bar or baz)` | Match files with regex `/foo/` **and** either `/bar/` **or** `/baz/` | +| `foo` | Match files with regex `/foo/` | +| `foo.*bar` | Match files with regex `/foo.*bar/` (foo followed by any characters, then bar) | +| `^function\s+\w+` | Match files with regex `/^function\s+\w+/` (function at start of line, followed by whitespace and word characters) | +| `"foo bar"` | Match files with regex `/foo bar/`. Quotes are not matched. | -Expressions can be prefixed with certain keywords to modify search behavior. Some keywords can be negated using the `-` prefix. +## Search filters + +Search queries (keyword or regex) can include multiple search filters to further refine the search results. Some filters can be negated using the `-` prefix. | Prefix | Description | Example | | :--- | :--- | :--- | | `file:` | Filter results from filepaths that match the regex. By default all files are searched. | `file:README` - Filter results to filepaths that match regex `/README/`
`file:"my file"` - Filter results to filepaths that match regex `/my file/`
`-file:test\.ts$` - Ignore results from filepaths match regex `/test\.ts$/` | -| `repo:` | Filter results from repos that match the regex. By default all repos are searched. | `repo:linux` - Filter results to repos that match regex `/linux/`
`-repo:^web/.*` - Ignore results from repos that match regex `/^web\/.*` | +| `repo:` | Filter results from repos that match the regex. By default all repos are searched. | `repo:linux` - Filter results to repos that match regex `/linux/`
`-repo:^web/.*` - Ignore results from repos that match regex `/^web\/.*/` | | `rev:` | Filter results from a specific branch or tag. By default **only** the default branch is searched. | `rev:beta` - Filter results to branches that match regex `/beta/` | | `lang:` | Filter results by language (as defined by [linguist](https://github.com/github-linguist/linguist/blob/main/lib/linguist/languages.yml)). By default all languages are searched. | `lang:TypeScript` - Filter results to TypeScript files
`-lang:YAML` - Ignore results from YAML files | | `sym:` | Match symbol definitions created by [universal ctags](https://ctags.io/) at index time. | `sym:\bmain\b` - Filter results to symbols that match regex `/\bmain\b/` | -| `context:` | Filter results to a predefined [search context](/docs/features/search/search-contexts). | `context:web` - Filter results to the web context
`-context:pipelines` - Ignore results from the pipelines context | \ No newline at end of file +| `context:` | Filter results to a predefined [search context](/docs/features/search/search-contexts). | `context:web` - Filter results to the web context
`-context:pipelines` - Ignore results from the pipelines context | + +## Boolean operators & grouping + +By default, space-separated expressions are and'd together. Using the `or` keyword as well as parentheses `()` can be used to create more complex boolean logic. Parentheses can be negated using the `-` prefix. + +| Example | Explanation | +| :--- | :--- | +| `foo or bar` | Match files containing `foo` **or** `bar` | +| `foo (bar or baz)` | Match files containing `foo` **and** either `bar` **or** `baz`. | +| `-(foo) bar` | Match files containing `bar` **and not** `foo`. | diff --git a/package.json b/package.json index a70bab991..c6621f53e 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,7 @@ "dev:prisma:studio": "yarn with-env yarn workspace @sourcebot/db prisma:studio", "dev:prisma:migrate:reset": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:reset", "dev:prisma:db:push": "yarn with-env yarn workspace @sourcebot/db prisma:db:push", - "build:deps": "yarn workspaces foreach --recursive --topological --from '{@sourcebot/schemas,@sourcebot/db,@sourcebot/shared}' run build" + "build:deps": "yarn workspaces foreach --recursive --topological --from '{@sourcebot/schemas,@sourcebot/db,@sourcebot/shared,@sourcebot/query-language}' run build" }, "devDependencies": { "concurrently": "^9.2.1", @@ -27,6 +27,7 @@ }, "packageManager": "yarn@4.7.0", "resolutions": { - "prettier": "3.5.3" + "prettier": "3.5.3", + "@lezer/common": "1.3.0" } } diff --git a/packages/backend/src/index.ts b/packages/backend/src/index.ts index 6ae361633..2acf72fd9 100644 --- a/packages/backend/src/index.ts +++ b/packages/backend/src/index.ts @@ -94,7 +94,6 @@ const listenToShutdownSignals = () => { const cleanup = async (signal: string) => { try { if (receivedSignal) { - logger.debug(`Recieved repeat signal ${signal}, ignoring.`); return; } receivedSignal = true; diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts index e7cb75540..245206d9d 100644 --- a/packages/db/src/index.ts +++ b/packages/db/src/index.ts @@ -1 +1,3 @@ +import type { User, Account } from ".prisma/client"; +export type UserWithAccounts = User & { accounts: Account[] }; export * from ".prisma/client"; \ No newline at end of file diff --git a/packages/mcp/CHANGELOG.md b/packages/mcp/CHANGELOG.md index 06f9e2a4a..e6332a354 100644 --- a/packages/mcp/CHANGELOG.md +++ b/packages/mcp/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed +- Updated API client to match the latest Sourcebot release. [#555](https://github.com/sourcebot-dev/sourcebot/pull/555) + ## [1.0.9] - 2025-11-17 ### Added diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 3e4750a72..a8d178894 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -70,16 +70,12 @@ server.tool( query += ` ( lang:${languages.join(' or lang:')} )`; } - if (caseSensitive) { - query += ` case:yes`; - } else { - query += ` case:no`; - } - const response = await search({ query, matches: env.DEFAULT_MATCHES, contextLines: env.DEFAULT_CONTEXT_LINES, + isRegexEnabled: true, + isCaseSensitivityEnabled: caseSensitive, }); if (isServiceError(response)) { diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index ba46b2f15..bab83a0d6 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -21,15 +21,17 @@ export const symbolSchema = z.object({ kind: z.string(), }); +export const searchOptionsSchema = z.object({ + matches: z.number(), // The number of matches to return. + contextLines: z.number().optional(), // The number of context lines to return. + whole: z.boolean().optional(), // Whether to return the whole file as part of the response. + isRegexEnabled: z.boolean().optional(), // Whether to enable regular expression search. + isCaseSensitivityEnabled: z.boolean().optional(), // Whether to enable case sensitivity. +}); + export const searchRequestSchema = z.object({ - // The zoekt query to execute. - query: z.string(), - // The number of matches to return. - matches: z.number(), - // The number of context lines to return. - contextLines: z.number().optional(), - // Whether to return the whole file as part of the response. - whole: z.boolean().optional(), + query: z.string(), // The zoekt query to execute. + ...searchOptionsSchema.shape, }); export const repositoryInfoSchema = z.object({ @@ -109,7 +111,7 @@ export const searchStatsSchema = z.object({ regexpsConsidered: z.number(), // FlushReason explains why results were flushed. - flushReason: z.number(), + flushReason: z.string(), }); export const searchResponseSchema = z.object({ @@ -139,7 +141,6 @@ export const searchResponseSchema = z.object({ content: z.string().optional(), })), repositoryInfo: z.array(repositoryInfoSchema), - isBranchFilteringEnabled: z.boolean(), isSearchExhaustive: z.boolean(), }); diff --git a/packages/queryLanguage/.gitignore b/packages/queryLanguage/.gitignore new file mode 100644 index 000000000..81d9910ba --- /dev/null +++ b/packages/queryLanguage/.gitignore @@ -0,0 +1,2 @@ +/node_modules/ +/dist diff --git a/packages/queryLanguage/package.json b/packages/queryLanguage/package.json new file mode 100644 index 000000000..b4ef8f427 --- /dev/null +++ b/packages/queryLanguage/package.json @@ -0,0 +1,20 @@ +{ + "name": "@sourcebot/query-language", + "private": true, + "main": "dist/index.js", + "scripts": { + "build": "lezer-generator src/query.grammar -o src/parser --typeScript --names && tsc", + "test": "vitest", + "postinstall": "yarn build" + }, + "devDependencies": { + "@lezer/generator": "^1.8.0", + "tsx": "^4.19.1", + "typescript": "^5.7.3", + "vitest": "^2.1.9" + }, + "dependencies": { + "@lezer/common": "^1.3.0", + "@lezer/lr": "^1.4.3" + } +} diff --git a/packages/queryLanguage/src/index.ts b/packages/queryLanguage/src/index.ts new file mode 100644 index 000000000..00cfbaade --- /dev/null +++ b/packages/queryLanguage/src/index.ts @@ -0,0 +1,7 @@ +import { parser } from "./parser"; + +type Tree = ReturnType; +type SyntaxNode = Tree['topNode']; +export type { Tree, SyntaxNode }; +export * from "./parser"; +export * from "./parser.terms"; \ No newline at end of file diff --git a/packages/queryLanguage/src/parser.terms.ts b/packages/queryLanguage/src/parser.terms.ts new file mode 100644 index 000000000..1682bb043 --- /dev/null +++ b/packages/queryLanguage/src/parser.terms.ts @@ -0,0 +1,21 @@ +// This file was generated by lezer-generator. You probably shouldn't edit it. +export const + negate = 22, + Program = 1, + OrExpr = 2, + AndExpr = 3, + NegateExpr = 4, + PrefixExpr = 5, + ArchivedExpr = 6, + RevisionExpr = 7, + ContentExpr = 8, + ContextExpr = 9, + FileExpr = 10, + ForkExpr = 11, + VisibilityExpr = 12, + RepoExpr = 13, + LangExpr = 14, + SymExpr = 15, + RepoSetExpr = 16, + ParenExpr = 17, + Term = 18 diff --git a/packages/queryLanguage/src/parser.ts b/packages/queryLanguage/src/parser.ts new file mode 100644 index 000000000..fb867c4f9 --- /dev/null +++ b/packages/queryLanguage/src/parser.ts @@ -0,0 +1,18 @@ +// This file was generated by lezer-generator. You probably shouldn't edit it. +import {LRParser} from "@lezer/lr" +import {negateToken} from "./tokens" +export const parser = LRParser.deserialize({ + version: 14, + states: "'[OVQROOO!WQQO'#CcO!WQQO'#CdO!WQQO'#CeO!WQQO'#CfO!`QSO'#CgO!kQSO'#ChO!WQQO'#CiO!WQQO'#CjO!WQQO'#CkO!WQQO'#ClOOQP'#Ca'#CaOVQRO'#CmO!vQQO'#C`OOQP'#Cn'#CnOOQP'#Cw'#CwO#nQRO'#CvO#{QQO'#CvO$WQQO'#C^OOQO'#Cu'#CuQOQQOOO!`QSO'#CbOOQP'#C}'#C}OOQP,58},58}OOQP,59O,59OOOQP,59P,59POOQP,59Q,59QOOQP'#DU'#DUOOQP,59R,59ROOQP'#DW'#DWOOQP,59S,59SOOQP,59T,59TOOQP,59U,59UOOQP,59V,59VOOQP,59W,59WO$]QQO,59XOOQP,58z,58zOOQP'#Co'#CoO$bQRO,58yOVQRO'#CpO$oQQO,58xOOQP,58|,58|OOQP1G.s1G.sOOQP-E6m-E6mO$zQRO'#CvOOQO'#Cv'#CvOOQO,59[,59[OOQO-E6n-E6n", + stateData: "%i~OhOS~Of]OleOpPOr^Os^OtQOuROvSOwTOyUO!OVO!PWO!QXO!RYO!S[O~OrfOsfO~OmkOnkOokO~O{mO|mO}mO~OleOpPOtQOuROvSOwTOyUO!OVO!PWO!QXO!RYO!S[O~OeiX!UjX!TiX~PVOeiX!UjX!TiX~O!UwO~O!TzO~OeRa!URa!TRa~PVO!UwOeQa!TQa~OejX!UjX!TjX~PVOrlptuvwy!O!P!Q!R!Usy~", + goto: "$`{PP|!Q!X!a!l!l!l!l!l!l!l!l!l!l!l!a!X!u!|PPPP#S#Y#aPPPPP#mPPPPPP$VP$]TcO[SaO[R}w]_O[`vw|[_O[`vw|Rt]_ZO[]`vw|Sv`|R{vQxbR!PxQdORs[SbO[R!OwS`O[Uu`v|R|wQgPQhQQiRQjSQoVQpWQqXRrYQlTRyeRnU", + nodeNames: "⚠ Program OrExpr AndExpr NegateExpr PrefixExpr ArchivedExpr RevisionExpr ContentExpr ContextExpr FileExpr ForkExpr VisibilityExpr RepoExpr LangExpr SymExpr RepoSetExpr ParenExpr Term", + maxTerm: 52, + skippedNodes: [0], + repeatNodeCount: 2, + tokenData: "!Hc~RpOX#VXY$TYZ$TZp#Vpq$Tqr#Vrs$csx#Vxy&Vyz&[z#T#V#T#U&a#U#V#V#V#W0q#W#Y#V#Y#Z:{#Z#`#V#`#aCZ#a#b#V#b#cGu#c#dIm#d#e!!V#e#f#V#f#g!-k#g#h!7q#h#j#V#j#k!;_#k#m#V#m#n!Em#n;'S#V;'S;=`#}<%lO#VP#[ZsPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#}<%lO#VP$QP;=`<%l#V~$YRh~XY$TYZ$Tpq$T~$fWOY$cZr$crs%Os#O$c#O#P%T#P;'S$c;'S;=`&P<%lO$c~%TOr~~%WRO;'S$c;'S;=`%a;=`O$c~%dXOY$cZr$crs%Os#O$c#O#P%T#P;'S$c;'S;=`&P;=`<%l$c<%lO$c~&SP;=`<%l$c~&[O!S~~&aO!T~~&f_sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#b#V#b#c'e#c#f#V#f#g)]#g;'S#V;'S;=`#}<%lO#VR'j]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#m#V#m#n(c#n;'S#V;'S;=`#}<%lO#VR(jZ}QsPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#}<%lO#V~)b]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#V#V#V#W*Z#W;'S#V;'S;=`#}<%lO#V~*`]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#[#V#[#]+X#];'S#V;'S;=`#}<%lO#V~+^]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#]#V#]#^,V#^;'S#V;'S;=`#}<%lO#V~,[]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#j#V#j#k-T#k;'S#V;'S;=`#}<%lO#V~-Y]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#X#V#X#Y.R#Y;'S#V;'S;=`#}<%lO#V~.W]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#W#V#W#X/P#X;'S#V;'S;=`#}<%lO#V~/UZsPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]/w!];'S#V;'S;=`#}<%lO#V~0OZl~sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#}<%lO#V~0v]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]1o!]#c#V#c#d2i#d;'S#V;'S;=`#}<%lO#V~1vZt~sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#}<%lO#V~2n]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#b#V#b#c3g#c;'S#V;'S;=`#}<%lO#V~3l]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#h#V#h#i4e#i;'S#V;'S;=`#}<%lO#V~4j]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#X#V#X#Y5c#Y;'S#V;'S;=`#}<%lO#V~5h_sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#b#V#b#c6g#c#l#V#l#m8]#m;'S#V;'S;=`#}<%lO#V~6l]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#h#V#h#i7e#i;'S#V;'S;=`#}<%lO#V~7jZsPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]1o!];'S#V;'S;=`#}<%lO#V~8b]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#h#V#h#i9Z#i;'S#V;'S;=`#}<%lO#V~9`ZsPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]:R!];'S#V;'S;=`#}<%lO#V~:YZu~sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#}<%lO#V~;Q_sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]u#Y;'S#V;'S;=`#}<%lO#V~>zZsPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]X#^;'S#V;'S;=`#}<%lO#V~!>^]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#U#V#U#V!?V#V;'S#V;'S;=`#}<%lO#V~!?[]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#]#V#]#^!@T#^;'S#V;'S;=`#}<%lO#V~!@Y]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#`#V#`#a!AR#a;'S#V;'S;=`#}<%lO#V~!AW]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#]#V#]#^!BP#^;'S#V;'S;=`#}<%lO#V~!BU]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#h#V#h#i!B}#i;'S#V;'S;=`#}<%lO#V~!CS]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#m#V#m#n!C{#n;'S#V;'S;=`#}<%lO#V~!DQZsPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]!Ds!];'S#V;'S;=`#}<%lO#V~!DzZy~sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#}<%lO#VR!Er]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#X#V#X#Y!Fk#Y;'S#V;'S;=`#}<%lO#VR!Fp]sPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!]#g#V#g#h!Gi#h;'S#V;'S;=`#}<%lO#VR!GpZmQsPOX#VZp#Vqr#Vsx#Vz}#V}!O#V!O![#V![!]#V!];'S#V;'S;=`#}<%lO#V", + tokenizers: [negateToken, 0, 1], + topRules: {"Program":[0,1]}, + tokenPrec: 193, + termNames: {"0":"⚠","1":"@top","2":"OrExpr","3":"AndExpr","4":"NegateExpr","5":"PrefixExpr","6":"ArchivedExpr","7":"RevisionExpr","8":"ContentExpr","9":"ContextExpr","10":"FileExpr","11":"ForkExpr","12":"VisibilityExpr","13":"RepoExpr","14":"LangExpr","15":"SymExpr","16":"RepoSetExpr","17":"ParenExpr","18":"Term","19":"expr+","20":"(or andExpr)+","21":"␄","22":"negate","23":"%mainskip","24":"space","25":"query","26":"andExpr","27":"expr","28":"archivedKw","29":"\"yes\"","30":"\"no\"","31":"\"only\"","32":"revisionKw","33":"value","34":"quotedString","35":"word","36":"contentKw","37":"contextKw","38":"fileKw","39":"forkKw","40":"forkValue","41":"visibilityKw","42":"visibilityValue","43":"\"public\"","44":"\"private\"","45":"\"any\"","46":"repoKw","47":"langKw","48":"symKw","49":"reposetKw","50":"\"(\"","51":"\")\"","52":"or"} +}) diff --git a/packages/queryLanguage/src/query.grammar b/packages/queryLanguage/src/query.grammar new file mode 100644 index 000000000..66c0ee83f --- /dev/null +++ b/packages/queryLanguage/src/query.grammar @@ -0,0 +1,102 @@ +@external tokens negateToken from "./tokens" { negate } + +@top Program { query } + +@precedence { + negate, + and, + or @left +} + +query { + OrExpr | + AndExpr | + expr +} + +OrExpr { andExpr (or andExpr)+ } + +AndExpr { expr expr+ } + +andExpr { AndExpr | expr } + +expr { + NegateExpr | + ParenExpr | + PrefixExpr | + Term +} + +NegateExpr { !negate negate (PrefixExpr | ParenExpr) } + +ParenExpr { "(" query ")" } + +PrefixExpr { + ArchivedExpr | + RevisionExpr | + ContentExpr | + ContextExpr | + FileExpr | + ForkExpr | + VisibilityExpr | + RepoExpr | + LangExpr | + SymExpr | + RepoSetExpr +} + +RevisionExpr { revisionKw value } +ContentExpr { contentKw value } +ContextExpr { contextKw value } +FileExpr { fileKw value } +RepoExpr { repoKw value } +LangExpr { langKw value } +SymExpr { symKw value } +RepoSetExpr { reposetKw value } + +// Modifiers +ArchivedExpr { archivedKw archivedValue } +ForkExpr { forkKw forkValue } +VisibilityExpr { visibilityKw visibilityValue } + +archivedValue { "yes" | "no" | "only" } +forkValue { "yes" | "no" | "only" } +visibilityValue { "public" | "private" | "any" } + +Term { quotedString | word } + +value { quotedString | word } + +@skip { space } + +@tokens { + archivedKw { "archived:" } + revisionKw { "rev:" } + contentKw { "content:" | "c:" } + contextKw { "context:" } + fileKw { "file:" | "f:" } + forkKw { "fork:" } + visibilityKw { "visibility:" } + repoKw { "repo:" | "r:" } + langKw { "lang:" } + symKw { "sym:" } + reposetKw { "reposet:" } + + or { "or" ![a-zA-Z0-9_] } + + quotedString { '"' (!["\\\n] | "\\" _)* '"' } + + // Allow almost anything in a word except spaces, parens, quotes + // Colons and dashes are allowed anywhere in words (including at the start) + word { (![ \t\n()"]) (![ \t\n()":] | ":" | "-")* } + + space { $[ \t\n]+ } + + @precedence { + quotedString, + archivedKw, revisionKw, contentKw, contextKw, fileKw, + forkKw, visibilityKw, repoKw, langKw, + symKw, reposetKw, or, + word + } +} \ No newline at end of file diff --git a/packages/queryLanguage/src/tokens.ts b/packages/queryLanguage/src/tokens.ts new file mode 100644 index 000000000..15a02525c --- /dev/null +++ b/packages/queryLanguage/src/tokens.ts @@ -0,0 +1,59 @@ +import { ExternalTokenizer } from "@lezer/lr"; +import { negate } from "./parser.terms"; + +// External tokenizer for negation +// Only tokenizes `-` as negate when followed by a prefix keyword or `(` +export const negateToken = new ExternalTokenizer((input) => { + if (input.next !== 45 /* '-' */) return; // Not a dash + + const startPos = input.pos; + + // Look ahead to see what follows the dash + input.advance(); + + // Skip whitespace + let ch = input.next; + while (ch === 32 || ch === 9 || ch === 10) { + input.advance(); + ch = input.next; + } + + // Check if followed by opening paren + if (ch === 40 /* '(' */) { + input.acceptToken(negate, -input.pos + startPos + 1); // Accept just the dash + return; + } + + // Check if followed by a prefix keyword (by checking for keyword followed by colon) + // Look ahead until we hit a delimiter or colon + const checkPos = input.pos; + let foundColon = false; + + // Look ahead until we hit a delimiter or colon + while (ch >= 0) { + if (ch === 58 /* ':' */) { + foundColon = true; + break; + } + // Hit a delimiter (whitespace, paren, or quote) - not a prefix keyword + if (ch === 32 || ch === 9 || ch === 10 || ch === 40 || ch === 41 || ch === 34) { + break; + } + input.advance(); + ch = input.next; + } + + // Reset position + while (input.pos > checkPos) { + input.advance(-1); + } + + if (foundColon) { + // It's a prefix keyword, accept as negate + input.acceptToken(negate, -input.pos + startPos + 1); + return; + } + + // Otherwise, don't tokenize as negate (let word handle it) +}); + diff --git a/packages/queryLanguage/test/basic.txt b/packages/queryLanguage/test/basic.txt new file mode 100644 index 000000000..de8bb93bd --- /dev/null +++ b/packages/queryLanguage/test/basic.txt @@ -0,0 +1,72 @@ +# Single term + +hello + +==> + +Program(Term) + +# Multiple terms + +hello world + +==> + +Program(AndExpr(Term,Term)) + +# Multiple terms with various characters + +console.log error_handler + +==> + +Program(AndExpr(Term,Term)) + +# Term with underscores + +my_variable_name + +==> + +Program(Term) + +# Term with dots + +com.example.package + +==> + +Program(Term) + +# Term with numbers + +func123 test_456 + +==> + +Program(AndExpr(Term,Term)) + +# Regex pattern + +[a-z]+ + +==> + +Program(Term) + +# Wildcard pattern + +test.* + +==> + +Program(Term) + +# Multiple regex patterns + +\w+ [0-9]+ \s* + +==> + +Program(AndExpr(Term,Term,Term)) + diff --git a/packages/queryLanguage/test/grammar.test.ts b/packages/queryLanguage/test/grammar.test.ts new file mode 100644 index 000000000..a02862856 --- /dev/null +++ b/packages/queryLanguage/test/grammar.test.ts @@ -0,0 +1,21 @@ +import { parser } from "../src/parser"; +import { fileTests } from "@lezer/generator/dist/test"; +import { describe, it } from "vitest"; +import { fileURLToPath } from "url" +import * as fs from "fs"; +import * as path from "path"; + +const caseDir = path.dirname(fileURLToPath(import.meta.url)) + +for (const file of fs.readdirSync(caseDir)) { + if (!/\.txt$/.test(file)) { + continue; + } + + let name = /^[^\.]*/.exec(file)?.[0]; + describe(name ?? "unknown", () => { + for (const { name, run } of fileTests(fs.readFileSync(path.join(caseDir, file), "utf8"), file)) { + it(name, () => run(parser)); + } + }); +} \ No newline at end of file diff --git a/packages/queryLanguage/test/grouping.txt b/packages/queryLanguage/test/grouping.txt new file mode 100644 index 000000000..e8c7798eb --- /dev/null +++ b/packages/queryLanguage/test/grouping.txt @@ -0,0 +1,120 @@ +# Empty parentheses + +() + +==> + +Program(ParenExpr(Term(⚠))) + +# Simple grouping + +(test) + +==> + +Program(ParenExpr(Term)) + +# Multiple terms in group + +(hello world) + +==> + +Program(ParenExpr(AndExpr(Term,Term))) + +# Nested parentheses + +((test)) + +==> + +Program(ParenExpr(ParenExpr(Term))) + +# Multiple groups + +(first) (second) + +==> + +Program(AndExpr(ParenExpr(Term),ParenExpr(Term))) + +# Group with multiple terms + +(one two three) + +==> + +Program(ParenExpr(AndExpr(Term,Term,Term))) + +# Mixed grouped and ungrouped + +test (grouped) another + +==> + +Program(AndExpr(Term,ParenExpr(Term),Term)) + +# Deeply nested + +(((nested))) + +==> + +Program(ParenExpr(ParenExpr(ParenExpr(Term)))) + +# Multiple nested groups + +((a b) (c d)) + +==> + +Program(ParenExpr(AndExpr(ParenExpr(AndExpr(Term,Term)),ParenExpr(AndExpr(Term,Term))))) + +# Group at start + +(start) middle end + +==> + +Program(AndExpr(ParenExpr(Term),Term,Term)) + +# Group at end + +start middle (end) + +==> + +Program(AndExpr(Term,Term,ParenExpr(Term))) + +# Complex grouping pattern + +(a (b c) d) + +==> + +Program(ParenExpr(AndExpr(Term,ParenExpr(AndExpr(Term,Term)),Term))) + +# Sequential groups + +(a)(b)(c) + +==> + +Program(AndExpr(ParenExpr(Term),ParenExpr(Term),ParenExpr(Term))) + +# Group with regex + +([a-z]+) + +==> + +Program(ParenExpr(Term)) + +# Group with dots + +(com.example.test) + +==> + +Program(ParenExpr(Term)) + diff --git a/packages/queryLanguage/test/negation.txt b/packages/queryLanguage/test/negation.txt new file mode 100644 index 000000000..716da1157 --- /dev/null +++ b/packages/queryLanguage/test/negation.txt @@ -0,0 +1,255 @@ +# Literal dash term + +-test + +==> + +Program(Term) + +# Quoted dash term + +"-excluded" + +==> + +Program(Term) + +# Dash in middle + +test-case + +==> + +Program(Term) + +# Multiple dash terms + +-one -two -three + +==> + +Program(AndExpr(Term,Term,Term)) + +# Negate file prefix + +-file:test.js + +==> + +Program(NegateExpr(PrefixExpr(FileExpr))) + +# Negate repo prefix + +-repo:archived + +==> + +Program(NegateExpr(PrefixExpr(RepoExpr))) + +# Negate lang prefix + +-lang:python + +==> + +Program(NegateExpr(PrefixExpr(LangExpr))) + +# Negate content prefix + +-content:TODO + +==> + +Program(NegateExpr(PrefixExpr(ContentExpr))) + +# Negate revision prefix + +-rev:develop + +==> + +Program(NegateExpr(PrefixExpr(RevisionExpr))) + +# Negate archived prefix + +-archived:yes + +==> + +Program(NegateExpr(PrefixExpr(ArchivedExpr))) + +# Negate fork prefix + +-fork:yes + +==> + +Program(NegateExpr(PrefixExpr(ForkExpr))) + +# Negate visibility prefix + +-visibility:any + +==> + +Program(NegateExpr(PrefixExpr(VisibilityExpr))) + +# Negate context prefix + +-context:backend + +==> + +Program(NegateExpr(PrefixExpr(ContextExpr))) + +# Negate symbol prefix + +-sym:OldClass + +==> + +Program(NegateExpr(PrefixExpr(SymExpr))) + +# Negate parentheses + +-(test) + +==> + +Program(NegateExpr(ParenExpr(Term))) + +# Negate group with multiple terms + +-(test exclude) + +==> + +Program(NegateExpr(ParenExpr(AndExpr(Term,Term)))) + +# Negate group with prefix + +-(file:test.js console.log) + +==> + +Program(NegateExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),Term)))) + +# Prefix with negated term + +file:test.js -console + +==> + +Program(AndExpr(PrefixExpr(FileExpr),Term)) + +# Multiple prefixes with negation + +file:test.js -lang:python + +==> + +Program(AndExpr(PrefixExpr(FileExpr),NegateExpr(PrefixExpr(LangExpr)))) + +# Complex negation pattern + +function -file:test.js -lang:java + +==> + +Program(AndExpr(Term,NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(LangExpr)))) + +# Negation inside parentheses + +(-file:test.js) + +==> + +Program(ParenExpr(NegateExpr(PrefixExpr(FileExpr)))) + +# Multiple negations in group + +(-file:a.js -lang:python) + +==> + +Program(ParenExpr(AndExpr(NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(LangExpr))))) + +# Mixed in parentheses + +(include -file:test.js) + +==> + +Program(ParenExpr(AndExpr(Term,NegateExpr(PrefixExpr(FileExpr))))) + +# Negate nested group + +-((file:test.js)) + +==> + +Program(NegateExpr(ParenExpr(ParenExpr(PrefixExpr(FileExpr))))) + +# Negate short form prefix + +-f:test.js + +==> + +Program(NegateExpr(PrefixExpr(FileExpr))) + +# Negate short form repo + +-r:myrepo + +==> + +Program(NegateExpr(PrefixExpr(RepoExpr))) + +# Negate short form content + +-c:console + +==> + +Program(NegateExpr(PrefixExpr(ContentExpr))) + +# Negate with prefix in quotes + +-file:"test file.js" + +==> + +Program(NegateExpr(PrefixExpr(FileExpr))) + +# Complex with multiple negated prefixes + +lang:typescript -file:*.test.ts -file:*.spec.ts + +==> + +Program(AndExpr(PrefixExpr(LangExpr),NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(FileExpr)))) + +# Negated group with prefix + +-(file:test.js lang:python) + +==> + +Program(NegateExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr))))) + +# Negate empty group + +-() + +==> + +Program(NegateExpr(ParenExpr(Term(⚠)))) + +# Negate with space after dash + +- file:test.js + +==> + +Program(NegateExpr(PrefixExpr(FileExpr))) diff --git a/packages/queryLanguage/test/operators.txt b/packages/queryLanguage/test/operators.txt new file mode 100644 index 000000000..0ff1f6d82 --- /dev/null +++ b/packages/queryLanguage/test/operators.txt @@ -0,0 +1,271 @@ +# Simple OR + +test or example + +==> + +Program(OrExpr(Term,Term)) + +# Multiple OR + +one or two or three + +==> + +Program(OrExpr(Term,Term,Term)) + +# OR with prefixes + +file:test.js or file:example.js + +==> + +Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(FileExpr))) + +# OR with negation + +test or -file:excluded.js + +==> + +Program(OrExpr(Term,NegateExpr(PrefixExpr(FileExpr)))) + +# OR with quoted strings + +"first option" or "second option" + +==> + +Program(OrExpr(Term,Term)) + +# OR with different prefixes + +lang:python or lang:javascript + +==> + +Program(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr))) + +# Multiple terms with OR + +function test or class example + +==> + +Program(OrExpr(AndExpr(Term,Term),AndExpr(Term,Term))) + +# OR in parentheses + +(test or example) + +==> + +Program(ParenExpr(OrExpr(Term,Term))) + +# OR with parentheses outside + +(test) or (example) + +==> + +Program(OrExpr(ParenExpr(Term),ParenExpr(Term))) + +# Complex OR with grouping + +(file:*.js lang:javascript) or (file:*.ts lang:typescript) + +==> + +Program(OrExpr(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr))),ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr))))) + +# OR with mixed content + +test or file:example.js + +==> + +Program(OrExpr(Term,PrefixExpr(FileExpr))) + +# Prefix OR term + +file:test.js or example + +==> + +Program(OrExpr(PrefixExpr(FileExpr),Term)) + +# OR with short form prefixes + +f:test.js or r:myrepo + +==> + +Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr))) + +# OR with repo prefixes + +repo:project1 or repo:project2 + +==> + +Program(OrExpr(PrefixExpr(RepoExpr),PrefixExpr(RepoExpr))) + +# OR with revision prefixes + +rev:main or rev:develop + +==> + +Program(OrExpr(PrefixExpr(RevisionExpr),PrefixExpr(RevisionExpr))) + +# OR with lang prefixes + +lang:rust or lang:go + +==> + +Program(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr))) + +# OR with content + +content:TODO or content:FIXME + +==> + +Program(OrExpr(PrefixExpr(ContentExpr),PrefixExpr(ContentExpr))) + +# OR with negated terms + +-file:test.js or -file:spec.js + +==> + +Program(OrExpr(NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(FileExpr)))) + +# OR in nested parentheses + +((a or b) or (c or d)) + +==> + +Program(ParenExpr(OrExpr(ParenExpr(OrExpr(Term,Term)),ParenExpr(OrExpr(Term,Term))))) + +# Multiple OR with parentheses and implicit AND + +(a or b) and (c or d) + +==> + +Program(AndExpr(ParenExpr(OrExpr(Term,Term)),Term,ParenExpr(OrExpr(Term,Term)))) + +# OR with wildcards + +*.test.js or *.spec.js + +==> + +Program(OrExpr(Term,Term)) + +# OR with regex patterns + +[a-z]+ or [0-9]+ + +==> + +Program(OrExpr(Term,Term)) + +# OR with dots + +com.example.test or org.example.test + +==> + +Program(OrExpr(Term,Term)) + +# OR with dashes + +test-one or test-two + +==> + +Program(OrExpr(Term,Term)) + +# Word containing 'or' + +order + +==> + +Program(Term) + +# Word containing 'or' in middle + +before + +==> + +Program(Term) + +# OR at start + +or test + +==> + +Program(⚠,Term) + +# OR at end (or becomes term) + +test or + +==> + +Program(AndExpr(Term,Term)) + +# Multiple consecutive OR + +test or or example + +==> + +Program(OrExpr(Term,⚠,Term)) + +# OR with all prefix types + +file:*.js or repo:myrepo or lang:javascript + +==> + +Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr),PrefixExpr(LangExpr))) + +# Complex query with OR and negation + +(lang:python or lang:ruby) -file:test.py + +==> + +Program(AndExpr(ParenExpr(OrExpr(PrefixExpr(LangExpr),PrefixExpr(LangExpr))),NegateExpr(PrefixExpr(FileExpr)))) + +# OR with quoted prefix values + +file:"test one.js" or file:"test two.js" + +==> + +Program(OrExpr(PrefixExpr(FileExpr),PrefixExpr(FileExpr))) + +# OR with empty parentheses + +() or () + +==> + +Program(OrExpr(ParenExpr(Term(⚠)),ParenExpr(Term(⚠)))) + +# OR with negated groups + +-(file:a.js) or -(file:b.js) + +==> + +Program(OrExpr(NegateExpr(ParenExpr(PrefixExpr(FileExpr))),NegateExpr(ParenExpr(PrefixExpr(FileExpr))))) diff --git a/packages/queryLanguage/test/precedence.txt b/packages/queryLanguage/test/precedence.txt new file mode 100644 index 000000000..d43e5b346 --- /dev/null +++ b/packages/queryLanguage/test/precedence.txt @@ -0,0 +1,200 @@ +# OR has lowest precedence - implicit AND groups first + +a b or c d + +==> + +Program(OrExpr(AndExpr(Term,Term),AndExpr(Term,Term))) + +# Multiple OR operators are left-associative + +a or b or c + +==> + +Program(OrExpr(Term,Term,Term)) + +# AND before OR + +file:test.js error or file:test.go panic + +==> + +Program(OrExpr(AndExpr(PrefixExpr(FileExpr),Term),AndExpr(PrefixExpr(FileExpr),Term))) + +# Negation binds tighter than AND + +-file:test.js error + +==> + +Program(AndExpr(NegateExpr(PrefixExpr(FileExpr)),Term)) + +# Negation binds tighter than OR + +-file:a.js or file:b.js + +==> + +Program(OrExpr(NegateExpr(PrefixExpr(FileExpr)),PrefixExpr(FileExpr))) + +# Parentheses override precedence + +(a or b) c + +==> + +Program(AndExpr(ParenExpr(OrExpr(Term,Term)),Term)) + +# Parentheses override - OR inside parens groups first + +a (b or c) + +==> + +Program(AndExpr(Term,ParenExpr(OrExpr(Term,Term)))) + +# Complex: AND, OR, and negation + +a -b or c d + +==> + +Program(OrExpr(AndExpr(Term,Term),AndExpr(Term,Term))) + +# Negated group in OR expression + +-(a b) or c + +==> + +Program(OrExpr(NegateExpr(ParenExpr(AndExpr(Term,Term))),Term)) + +# Multiple negations in OR + +-file:a.js or -file:b.js or file:c.js + +==> + +Program(OrExpr(NegateExpr(PrefixExpr(FileExpr)),NegateExpr(PrefixExpr(FileExpr)),PrefixExpr(FileExpr))) + +# Prefix binds to its value only + +file:a.js b.js + +==> + +Program(AndExpr(PrefixExpr(FileExpr),Term)) + +# OR with prefixes and terms mixed + +repo:backend error or repo:frontend warning + +==> + +Program(OrExpr(AndExpr(PrefixExpr(RepoExpr),Term),AndExpr(PrefixExpr(RepoExpr),Term))) + +# Nested parentheses with OR + +((a or b) c) or d + +==> + +Program(OrExpr(ParenExpr(AndExpr(ParenExpr(OrExpr(Term,Term)),Term)),Term)) + +# OR at different nesting levels + +(a or (b or c)) + +==> + +Program(ParenExpr(OrExpr(Term,ParenExpr(OrExpr(Term,Term))))) + +# Implicit AND groups all adjacent terms before OR + +a b c or d e f + +==> + +Program(OrExpr(AndExpr(Term,Term,Term),AndExpr(Term,Term,Term))) + +# Mixed prefix and regular terms with OR + +lang:go func or lang:rust fn + +==> + +Program(OrExpr(AndExpr(PrefixExpr(LangExpr),Term),AndExpr(PrefixExpr(LangExpr),Term))) + +# Negation doesn't affect OR grouping + +a or -b or c + +==> + +Program(OrExpr(Term,Term,Term)) + +# Parentheses can isolate OR from surrounding AND + +a (b or c) d + +==> + +Program(AndExpr(Term,ParenExpr(OrExpr(Term,Term)),Term)) + +# Multiple parenthesized groups with AND + +(a or b) (c or d) + +==> + +Program(AndExpr(ParenExpr(OrExpr(Term,Term)),ParenExpr(OrExpr(Term,Term)))) + +# Quoted strings are atomic - no precedence inside + +"a or b" + +==> + +Program(Term) + +# Prefix with OR value doesn't split + +file:"a.js or b.js" + +==> + +Program(PrefixExpr(FileExpr)) + +# Negated prefix in complex expression + +-file:test.js lang:go error or warning + +==> + +Program(OrExpr(AndExpr(NegateExpr(PrefixExpr(FileExpr)),PrefixExpr(LangExpr),Term),Term)) + +# OR followed by parenthesized AND + +a or (b c) + +==> + +Program(OrExpr(Term,ParenExpr(AndExpr(Term,Term)))) + +# Empty parens don't affect precedence + +() or a b + +==> + +Program(OrExpr(ParenExpr(Term(⚠)),AndExpr(Term,Term))) + +# Negation of empty group + +-() a + +==> + +Program(AndExpr(NegateExpr(ParenExpr(Term(⚠))),Term)) + diff --git a/packages/queryLanguage/test/prefixes.txt b/packages/queryLanguage/test/prefixes.txt new file mode 100644 index 000000000..00533ec03 --- /dev/null +++ b/packages/queryLanguage/test/prefixes.txt @@ -0,0 +1,336 @@ +# File prefix + +file:README.md + +==> + +Program(PrefixExpr(FileExpr)) + +# File prefix short form + +f:index.ts + +==> + +Program(PrefixExpr(FileExpr)) + +# Repo prefix + +repo:myproject + +==> + +Program(PrefixExpr(RepoExpr)) + +# Repo prefix short form + +r:github.com/user/repo + +==> + +Program(PrefixExpr(RepoExpr)) + +# Content prefix + +content:function + +==> + +Program(PrefixExpr(ContentExpr)) + +# Content prefix short form + +c:console.log + +==> + +Program(PrefixExpr(ContentExpr)) + +# Revision prefix + +rev:main + +==> + +Program(PrefixExpr(RevisionExpr)) + +# Lang prefix + +lang:typescript + +==> + +Program(PrefixExpr(LangExpr)) + +# Archived prefix - no + +archived:no + +==> + +Program(PrefixExpr(ArchivedExpr)) + +# Archived prefix - only + +archived:only + +==> + +Program(PrefixExpr(ArchivedExpr)) + +# Fork prefix - yes + +fork:yes + +==> + +Program(PrefixExpr(ForkExpr)) + +# Fork prefix - only + +fork:only + +==> + +Program(PrefixExpr(ForkExpr)) + +# Visibility prefix - public + +visibility:public + +==> + +Program(PrefixExpr(VisibilityExpr)) + +# Context prefix + +context:web + +==> + +Program(PrefixExpr(ContextExpr)) + +# Symbol prefix + +sym:MyClass + +==> + +Program(PrefixExpr(SymExpr)) + +# RepoSet prefix + +reposet:repo1,repo2 + +==> + +Program(PrefixExpr(RepoSetExpr)) + +# File with wildcard + +file:*.ts + +==> + +Program(PrefixExpr(FileExpr)) + +# File with path + +file:src/components/Button.tsx + +==> + +Program(PrefixExpr(FileExpr)) + +# Repo with full URL + +repo:github.com/org/project + +==> + +Program(PrefixExpr(RepoExpr)) + +# Multiple prefixes + +file:test.js repo:myproject + +==> + +Program(AndExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr))) + +# Prefix with term + +file:test.js console.log + +==> + +Program(AndExpr(PrefixExpr(FileExpr),Term)) + +# Term then prefix + +console.log file:handler.ts + +==> + +Program(AndExpr(Term,PrefixExpr(FileExpr))) + +# Multiple prefixes and terms + +lang:typescript function file:handler.ts + +==> + +Program(AndExpr(PrefixExpr(LangExpr),Term,PrefixExpr(FileExpr))) + +# Prefix with regex pattern + +file:[a-z]+\.test\.js + +==> + +Program(PrefixExpr(FileExpr)) + +# Content with spaces in value (no quotes) + +content:hello + +==> + +Program(PrefixExpr(ContentExpr)) + +# Revision with slashes + +rev:feature/new-feature + +==> + +Program(PrefixExpr(RevisionExpr)) + +# RepoSet with multiple repos + +reposet:repo1,repo2,repo3 + +==> + +Program(PrefixExpr(RepoSetExpr)) + +# Symbol with dots + +sym:package.Class.method + +==> + +Program(PrefixExpr(SymExpr)) + +# Lang with various languages + +lang:python + +==> + +Program(PrefixExpr(LangExpr)) + +# Archived prefix - yes + +archived:yes + +==> + +Program(PrefixExpr(ArchivedExpr)) + +# Archived prefix - invalid value (error case) + +archived:invalid + +==> + +Program(AndExpr(PrefixExpr(ArchivedExpr(⚠)),Term)) + +# Fork prefix - no + +fork:no + +==> + +Program(PrefixExpr(ForkExpr)) + +# Fork prefix - invalid value (error case) + +fork:invalid + +==> + +Program(AndExpr(PrefixExpr(ForkExpr(⚠)),Term)) + +# Visibility prefix - private + +visibility:private + +==> + +Program(PrefixExpr(VisibilityExpr)) + +# Visibility prefix - any + +visibility:any + +==> + +Program(PrefixExpr(VisibilityExpr)) + +# Visibility prefix - invalid value (error case) + +visibility:invalid + +==> + +Program(AndExpr(PrefixExpr(VisibilityExpr(⚠)),Term)) + +# File with dashes + +file:my-component.tsx + +==> + +Program(PrefixExpr(FileExpr)) + +# Repo with numbers + +repo:project123 + +==> + +Program(PrefixExpr(RepoExpr)) + +# Content with special chars + +content:@Component + +==> + +Program(PrefixExpr(ContentExpr)) + +# Context with underscores + +context:data_engineering + +==> + +Program(PrefixExpr(ContextExpr)) + +# Prefix in parentheses + +(file:test.js) + +==> + +Program(ParenExpr(PrefixExpr(FileExpr))) + +# Multiple prefixes in group + +(file:*.ts lang:typescript) + +==> + +Program(ParenExpr(AndExpr(PrefixExpr(FileExpr),PrefixExpr(LangExpr)))) + diff --git a/packages/queryLanguage/test/quoted.txt b/packages/queryLanguage/test/quoted.txt new file mode 100644 index 000000000..088ed0d3b --- /dev/null +++ b/packages/queryLanguage/test/quoted.txt @@ -0,0 +1,479 @@ +# Simple quoted string + +"hello" + +==> + +Program(Term) + +# Quoted string with spaces + +"hello world" + +==> + +Program(Term) + +# Multiple words in quotes + +"this is a search term" + +==> + +Program(Term) + +# Quoted string with escaped quote + +"hello \"world\"" + +==> + +Program(Term) + +# Quoted string with escaped backslash + +"path\\to\\file" + +==> + +Program(Term) + +# Double backslash + +"test\\\\path" + +==> + +Program(Term) + +# Multiple escaped quotes + +"\"quoted\" \"words\"" + +==> + +Program(Term) + +# Mixed escaped characters + +"test\\nvalue\"quoted" + +==> + +Program(Term) + +# Empty quoted string + +"" + +==> + +Program(Term) + +# Quoted string with only spaces + +" " + +==> + +Program(Term) + +# Quoted string in file prefix + +file:"my file.txt" + +==> + +Program(PrefixExpr(FileExpr)) + +# Quoted string in repo prefix + +repo:"github.com/user/repo name" + +==> + +Program(PrefixExpr(RepoExpr)) + +# Quoted string in content prefix + +content:"console.log" + +==> + +Program(PrefixExpr(ContentExpr)) + +# Quoted string in revision prefix + +rev:"feature/my feature" + +==> + +Program(PrefixExpr(RevisionExpr)) + +# Multiple quoted strings + +"first string" "second string" + +==> + +Program(AndExpr(Term,Term)) + +# Quoted and unquoted mixed + +unquoted "quoted string" another + +==> + +Program(AndExpr(Term,Term,Term)) + +# Quoted string with parentheses inside + +"(test)" + +==> + +Program(Term) + +# Quoted string with brackets + +"[a-z]+" + +==> + +Program(Term) + +# Quoted string with special chars + +"test@example.com" + +==> + +Program(Term) + +# Quoted string with colons + +"key:value" + +==> + +Program(Term) + +# Quoted string with dashes + +"test-case-example" + +==> + +Program(Term) + +# Quoted string with dots + +"com.example.package" + +==> + +Program(Term) + +# Quoted string with regex pattern + +"\\w+\\s*=\\s*\\d+" + +==> + +Program(Term) + +# Quoted string with forward slashes + +"path/to/file" + +==> + +Program(Term) + +# Quoted string with underscores + +"my_variable_name" + +==> + +Program(Term) + +# Quoted string with numbers + +"test123" + +==> + +Program(Term) + +# Quoted string with mixed case + +"CamelCaseTest" + +==> + +Program(Term) + +# Quoted prefix value with spaces + +file:"test file.js" + +==> + +Program(PrefixExpr(FileExpr)) + +# Multiple prefixes with quoted values + +file:"my file.txt" repo:"my repo" + +==> + +Program(AndExpr(PrefixExpr(FileExpr),PrefixExpr(RepoExpr))) + +# Quoted string in parentheses + +("quoted term") + +==> + +Program(ParenExpr(Term)) + +# Multiple quoted in parentheses + +("first" "second") + +==> + +Program(ParenExpr(AndExpr(Term,Term))) + +# Quoted with escaped newline + +"line1\\nline2" + +==> + +Program(Term) + +# Quoted with tab character + +"value\\ttab" + +==> + +Program(Term) + +# Lang prefix with quoted value + +lang:"objective-c" + +==> + +Program(PrefixExpr(LangExpr)) + +# Sym prefix with quoted value + +sym:"My Class" + +==> + +Program(PrefixExpr(SymExpr)) + +# Content with quoted phrase + +content:"TODO: fix this" + +==> + +Program(PrefixExpr(ContentExpr)) + +# Quoted string with at symbol + +"@decorator" + +==> + +Program(Term) + +# Quoted string with hash + +"#define" + +==> + +Program(Term) + +# Quoted string with dollar sign + +"$variable" + +==> + +Program(Term) + +# Quoted string with percent + +"100%" + +==> + +Program(Term) + +# Quoted string with ampersand + +"foo&bar" + +==> + +Program(Term) + +# Quoted string with asterisk + +"test*" + +==> + +Program(Term) + +# Quoted string with plus + +"a+b" + +==> + +Program(Term) + +# Quoted string with equals + +"a=b" + +==> + +Program(Term) + +# Quoted string with angle brackets + +"