diff --git a/.vscode/settings.json b/.vscode/settings.json index 25fa6215..ed23d529 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,4 @@ { - "typescript.tsdk": "node_modules/typescript/lib" + "typescript.tsdk": "node_modules/typescript/lib", + "cSpell.words": ["venv"] } diff --git a/applications/browser/package.json b/applications/browser/package.json index 65d652bc..a0aa47f8 100644 --- a/applications/browser/package.json +++ b/applications/browser/package.json @@ -22,7 +22,8 @@ "@scribe/theia-utils": "0.0.1", "@theia/git": "1.51.0", "@theia/scm": "1.51.0", - "scribe-usfm-editor": "0.0.0" + "scribe-usfm-editor": "0.0.0", + "scribe-python": "0.1.0" }, "devDependencies": { "@theia/cli": "1.51.0" diff --git a/packages/python/package.json b/packages/python/package.json new file mode 100644 index 00000000..72972cb7 --- /dev/null +++ b/packages/python/package.json @@ -0,0 +1,51 @@ +{ + "name": "scribe-python", + "version": "0.1.0", + "keywords": [ + "theia-extension" + ], + "scripts": { + "build:style": "npx tailwindcss -i ./src/browser/styles/globals.css -o ./lib/browser/output-tailwind.css -c ./tailwind.config.js", + "build:ts": "tsc", + "build": "yarn build:ts && yarn build:style", + "production": "yarn build", + "watch": "npm-watch build", + "clean": "rimraf lib node_modules *.tsbuildinfo", + "productionCICD": "yarn build" + }, + "dependencies": { + "@theia/core": "1.51.0", + "axios": "^1.6.2", + "extract-zip": "^2.0.1", + "fs-extra": "^11.2.0", + "inversify": "^6.0.1", + "tar": "^7.4.3" + }, + "devDependencies": { + "rimraf": "^5.0.5", + "tailwindcss": "^3.4.1", + "tailwindcss-animate": "^1.0.7", + "postcss": "^8.4.35", + "autoprefixer": "^10.4.17", + "npm-watch": "^0.13.0" + }, + "files": [ + "lib", + "src" + ], + "theiaExtensions": [ + { + "backend": "lib/node/python-backend-module", + "frontend": "lib/browser/python-frontend-module" + } + ], + "watch": { + "build": { + "patterns": [ + "src/**/*" + ], + "extensions": "ts,html,scss,css,tsx,json", + "quiet": false + } + } +} diff --git a/packages/python/postcss.config.js b/packages/python/postcss.config.js new file mode 100644 index 00000000..12a703d9 --- /dev/null +++ b/packages/python/postcss.config.js @@ -0,0 +1,6 @@ +module.exports = { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +}; diff --git a/packages/python/src/browser/components/TextAnalysis.tsx b/packages/python/src/browser/components/TextAnalysis.tsx new file mode 100644 index 00000000..3ed963e7 --- /dev/null +++ b/packages/python/src/browser/components/TextAnalysis.tsx @@ -0,0 +1,248 @@ +import React from "react"; +import { useState } from "react"; +import { ChevronDown, ChevronRight } from "lucide-react"; + +export interface AnalysisData { + n_lines: number; + n_characters: number; + "letter-script": Record; + "number-script": Record; + "other-script": Record; + "non-canonical": Record< + string, + { + orig: string; + norm: string; + "orig-count": number; + "norm-count": number; + "orig-form": string; + "norm-form": string; + changes: string[]; + } + >; + "char-conflict": Record; + "notable-token": Record< + string, + Record + >; + pattern: Record< + string, + Record + >; + block: Record< + string, + Record< + string, + { + char: string; + id: string; + name: string; + count: number; + ex: [string, number][]; + } + > + >; +} + +const CollapsibleSection: React.FC<{ + title: string; + children: React.ReactNode; +}> = ({ title, children }) => { + const [isOpen, setIsOpen] = useState(false); + + return ( +
+ + {isOpen &&
{children}
} +
+ ); +}; + +export const TextAnalysis: React.FC<{ data: AnalysisData }> = ({ data }) => { + return ( +
+

+ Text Analysis Results +

+ +
+
+

+ Number of Lines +

+

{data.n_lines}

+
+
+

+ Number of Characters +

+

{data.n_characters}

+
+
+ + + {Object.entries(data["letter-script"]).map(([script, info]) => ( +
+

+ {script} +

+

Count: {info.count}

+ {info.ex && ( +

Example: {info.ex}

+ )} +
+ ))} +
+ + + {Object.entries(data["number-script"]).map(([script, info]) => ( +
+

+ {script} +

+

Count: {info.count}

+

Example: {info.ex}

+
+ ))} +
+ + + {Object.entries(data["other-script"]).map(([script, info]) => ( +
+

+ {script} +

+

Count: {info.count}

+

Example: {info.ex}

+
+ ))} +
+ + + {Object.entries(data["non-canonical"]).map(([char, info]) => ( +
+

+ {char} +

+

+ Original: {info.orig} (Count: {info["orig-count"]}) +

+

+ Normalized: {info.norm} (Count: {info["norm-count"]}) +

+

+ Changes: {info.changes.join(", ")} +

+
+ ))} +
+ + + {Object.entries(data["notable-token"]).map(([category, tokens]) => ( +
+

+ {category} +

+ {Object.entries(tokens).map(([token, info]) => ( +
+

+ Token: {info.token} +

+

+ Count: {info.count} +

+

+ Examples:{" "} + {info.ex + .map(([ex, line]) => `${ex} (line ${line})`) + .join(", ")} +

+
+ ))} +
+ ))} +
+ + + {Object.entries(data.pattern).map(([category, patterns]) => ( +
+

+ {category} +

+ {Object.entries(patterns).map(([pattern, info]) => ( +
+

+ Pattern: {info.pattern} +

+

+ Count: {info.count} +

+

+ Examples:{" "} + {info.ex + .map(([ex, line]) => `${ex} (line ${line})`) + .join(", ")} +

+
+ ))} +
+ ))} +
+ + + {Object.entries(data.block).map(([blockName, characters]) => ( +
+

+ {blockName} +

+ {Object.entries(characters).map(([char, info]) => ( +
+

+ Character: {info.char} ({info.id}) +

+

Name: {info.name}

+

+ Count: {info.count} +

+

+ Examples:{" "} + {info.ex + .map(([ex, line]) => `${ex} (line ${line})`) + .join(", ")} +

+
+ ))} +
+ ))} +
+
+ ); +}; diff --git a/packages/python/src/browser/python-contribution.ts b/packages/python/src/browser/python-contribution.ts new file mode 100644 index 00000000..87ed3f9c --- /dev/null +++ b/packages/python/src/browser/python-contribution.ts @@ -0,0 +1,82 @@ +import { injectable, inject } from "@theia/core/shared/inversify"; +import { + Command, + CommandContribution, + CommandRegistry, + MessageService, +} from "@theia/core/lib/common"; +import { PythonService } from "../common/python-protocol"; + +const PLACEHOLDER_TEST_TEXT_WILDEBEEST = ` + This is a problematic text file designed to contain multiple issues for testing purposes: +1. UTF-8 encoding violations: ����� (these should not appear properly). +2. Control characters:  embedded within text. +3. Zero-width characters: word​separator and ‍joiner. +4. Mixed letters/numbers/punctuation/letter-modifiers from various scripts: + - Arabic: العربية. + - Greek: αβγ. + - Cyrillic: АБВ. + - Latin: aͯ́b. + - Hebrew: אבג. +5. Tokens with letters from different scripts: αБاא. +6. XML tokens: Invalid & "special" . +7. Tokens with certain punctuation: foo!bar?baz,hello;world. +8. Orphan letter modifiers: áḅ̂. +9. Non-canonical character combinations: áá (á should be normalized to á). + `; + +@injectable() +export class PythonSetupContribution implements CommandContribution { + static readonly SETUP_ENV: Command = { + id: "scribe.python.setup", + label: "Python: Setup Python Environment", + }; + + static readonly SETUP_WILDEBEEST: Command = { + id: "scribe.python.setup-wildebeest", + label: "Python: Setup Wildebeest", + }; + + static readonly EXECUTE_WILDEBEEST: Command = { + id: "scribe.python.execute-wildebeest", + label: "Python: Execute Wildebeest", + }; + + @inject(MessageService) + private messageService: MessageService; + + @inject(PythonService) + private pythonService: PythonService; + + registerCommands(registry: CommandRegistry): void { + registry.registerCommand(PythonSetupContribution.SETUP_ENV, { + execute: async () => { + this.messageService.info("Starting Python environment setup..."); + try { + await this.pythonService.setupEnvironment(); + this.messageService.info("Python environment setup completed!"); + } catch (error) { + this.messageService.error(`Setup failed: ${error.message}`); + } + }, + }); + + registry.registerCommand(PythonSetupContribution.SETUP_WILDEBEEST, { + execute: async () => { + this.messageService.info("Starting Wildebeest setup..."); + try { + await this.pythonService.setupWildebeest(); + this.messageService.info("Wildebeest setup completed!"); + } catch (error) { + this.messageService.error(`Setup failed: ${error.message}`); + } + }, + }); + + registry.registerCommand(PythonSetupContribution.EXECUTE_WILDEBEEST, { + execute: async () => { + await this.pythonService.executeWildebeest(PLACEHOLDER_TEST_TEXT_WILDEBEEST); + }, + }); + } +} diff --git a/packages/python/src/browser/python-frontend-module.ts b/packages/python/src/browser/python-frontend-module.ts new file mode 100644 index 00000000..bfb637c6 --- /dev/null +++ b/packages/python/src/browser/python-frontend-module.ts @@ -0,0 +1,39 @@ +import { ContainerModule } from "@theia/core/shared/inversify"; +import { CommandContribution } from "@theia/core/lib/common"; +import { ServiceConnectionProvider } from "@theia/core/lib/browser/messaging/service-connection-provider"; +import { PythonSetupContribution } from "./python-contribution"; +import { PythonService, PythonServicePath } from "../common/python-protocol"; +import { + FrontendApplicationContribution, + WidgetFactory, +} from "@theia/core/lib/browser"; +import { + ChecksWidget, + ChecksWidgetContribution, +} from "./widgets/checks-widget"; +import { bindViewContribution } from "@theia/core/lib/browser"; + +import "../../lib/browser/output-tailwind.css"; + +export default new ContainerModule((bind) => { + bind(CommandContribution).to(PythonSetupContribution); + + bind(PythonService) + .toDynamicValue((ctx) => { + const connection = ctx.container.get( + ServiceConnectionProvider + ); + return connection.createProxy(PythonServicePath); + }) + .inSingletonScope(); + + bindViewContribution(bind, ChecksWidgetContribution); + bind(FrontendApplicationContribution).toService(ChecksWidgetContribution); + bind(ChecksWidget).toSelf(); + bind(WidgetFactory) + .toDynamicValue((ctx) => ({ + id: ChecksWidget.ID, + createWidget: () => ctx.container.get(ChecksWidget), + })) + .inSingletonScope(); +}); diff --git a/packages/python/src/browser/styles/globals.css b/packages/python/src/browser/styles/globals.css new file mode 100644 index 00000000..b5c61c95 --- /dev/null +++ b/packages/python/src/browser/styles/globals.css @@ -0,0 +1,3 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; diff --git a/packages/python/src/browser/widgets/checks-widget.tsx b/packages/python/src/browser/widgets/checks-widget.tsx new file mode 100644 index 00000000..e5745b80 --- /dev/null +++ b/packages/python/src/browser/widgets/checks-widget.tsx @@ -0,0 +1,150 @@ +import * as React from "@theia/core/shared/react"; +import { + inject, + injectable, + postConstruct, +} from "@theia/core/shared/inversify"; +import { ReactWidget } from "@theia/core/lib/browser/widgets/react-widget"; +import { + AbstractViewContribution, + FrontendApplicationContribution, + FrontendApplication, + codicon, +} from "@theia/core/lib/browser"; +import { FrontendApplicationStateService } from "@theia/core/lib/browser/frontend-application-state"; +import { WorkspaceService } from "@theia/workspace/lib/browser"; +import { AnalysisData, TextAnalysis } from "../components/TextAnalysis"; +import { PythonService } from "../../common/python-protocol"; + +const PLACEHOLDER_TEST_TEXT = ` + This is a problematic text file designed to contain multiple issues for testing purposes: +1. UTF-8 encoding violations: (these should not appear properly). +2. Control characters:  embedded within text. +3. Zero-width characters: word​separator and ‍joiner. +4. Mixed letters/numbers/punctuation/letter-modifiers from various scripts: + - Arabic: العربية. + - Greek: αβγ. +`; + +@injectable() +export class ChecksWidget extends ReactWidget { + static readonly ID = "python-checks-widget"; + static readonly LABEL = "Checks"; + + private analysisData: AnalysisData | null = null; + + @inject(PythonService) + private pythonService: PythonService; + + @postConstruct() + protected init(): void { + this.doInit(); + } + + protected async doInit(): Promise { + this.id = ChecksWidget.ID; + this.title.label = ChecksWidget.LABEL; + this.title.caption = ChecksWidget.LABEL; + this.title.closable = true; + this.title.iconClass = codicon("checklist"); + this.update(); + } + + protected async getAnalysisData(): Promise { + const data = await this.pythonService.executeWildebeest(PLACEHOLDER_TEST_TEXT); + return JSON.parse(data); + } + + render(): React.ReactNode { + return ( +
+ { + this.pythonService.executeWildebeest(text).then((data) => { + this.analysisData = JSON.parse(data); + this.update(); + }); + }} + /> + {this.analysisData && } +
+ ); + } +} + +@injectable() +export class ChecksWidgetContribution + extends AbstractViewContribution + implements FrontendApplicationContribution +{ + @inject(FrontendApplicationStateService) + protected readonly stateService: FrontendApplicationStateService; + + @inject(WorkspaceService) + protected readonly workspaceService: WorkspaceService; + + constructor() { + super({ + widgetId: ChecksWidget.ID, + widgetName: ChecksWidget.LABEL, + defaultWidgetOptions: { + area: "right", + }, + }); + } + + async onStart(app: FrontendApplication): Promise { + this.stateService.reachedState("ready").then(() => { + this.openView({ + activate: true, + reveal: true, + }); + }); + } +} + +interface CheckInputFormProps { + onSubmit?: (text: string) => void; + placeholder?: string; + className?: string; +} + +const CheckInputForm: React.FC = ({ + onSubmit, + placeholder = "Enter text to analyze...", + className, +}) => { + const [inputText, setInputText] = React.useState(""); + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault(); + if (inputText.trim() && onSubmit) { + onSubmit(inputText); + } + }; + + return ( +
+ +