microsoft · pelikhan · Jul 1, 2025 · Jun 2, 2025 · Jun 4, 2025 · Jun 4, 2025
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -1,5 +1,5 @@
 {
-    "image": "mcr.microsoft.com/devcontainers/javascript-node:20",
+    "image": "mcr.microsoft.com/devcontainers/javascript-node:22",
     "features": {
         "ghcr.io/devcontainers/features/common-utils:2": {},
         "ghcr.io/devcontainers/features/git:1": {},

diff --git a/.env.ollama b/.env.ollama
@@ -0,0 +1,18 @@
+GENAISCRIPT_MODEL_LARGE="ollama:llama3.3"
+GENAISCRIPT_MODEL_SMALL="ollama:qwen2.5:3b"
+GENAISCRIPT_MODEL_TINY="ollama:llama3.2:1b"
+GENAISCRIPT_MODEL_VISION="azure:gpt-4o_2024-11-20"
+GENAISCRIPT_MODEL_VISION_SMALL="azure:gpt-4o-mini_2024-11-20"
+GENAISCRIPT_MODEL_REASONING="azure:o1_2024-12-17"
+GENAISCRIPT_MODEL_REASONING_SMALL="azure:o3-mini_2025-01-31"
+GENAISCRIPT_MODEL_IMAGE="azure:dall-e-3_30"
+GENAISCRIPT_MODEL_TRANSCRIPTION="azure:whisper_001"
+GENAISCRIPT_MODEL_EMBEDDINGS="azure:text-embedding-ada-002_2"
+GENAISCRIPT_MODEL_EVAL1="azure:gpt-4o_2024-11-20"
+GENAISCRIPT_MODEL_EVAL="ollama:llama3.3"
+GENAISCRIPT_MODEL_RULES="ollama:llama3.3"
+GENAISCRIPT_MODEL_RULES1="ollama:llama3.3"
+GENAISCRIPT_MODEL_BASELINE="ollama:llama3.3"
+
+# use this command to login daily
+# az login --scope api://trapi/.default
diff --git a/.github.env b/.github.env
@@ -1,5 +1,5 @@
-GENAISCRIPT_MODEL_LARGE="github:gpt-4o"
-GENAISCRIPT_MODEL_EVAL="github:gpt-4o"
-GENAISCRIPT_MODEL_RULES="github:gpt-4o"
-GENAISCRIPT_MODEL_BASELINE="github:gpt-4o"
-PROMPTPEX_MODELS="github:gpt-4o-mini;github:phi-4-mini-instruct"
+GENAISCRIPT_MODEL_LARGE="github:openai/gpt-4o"
+GENAISCRIPT_MODEL_EVAL="github:openai/gpt-4o"
+GENAISCRIPT_MODEL_RULES="github:openai/gpt-4o"
+GENAISCRIPT_MODEL_BASELINE="github:openai/gpt-4o"
+PROMPTPEX_MODELS="github:openai/gpt-4o-mini;github:microsoft/phi-4-mini-instruct"
diff --git a/.github/instructions/implementation.instructions.md b/.github/instructions/implementation.instructions.md
@@ -0,0 +1,81 @@
+# Implementation Plan
+
+Hi there fellow Agent,
+
+The goal of this page is to help you implement the PromptPex framework in your codebase.
+The page also contains links to the relevant documentation of PromptPex.
+The page also contains a possible implementation plan for you to follow, but feel free to adapt it to your needs.
+
+Your task is to implement the PromptPex framework in your codebase according to the requirement of the user. Follow the instructions of the user to choose which language/framework to use, and how to implement the framework in your codebase.
+
+## IMPORTANT
+
+When implementing PromptPex, it is very important to reuse the existing prompts as is. Do not change the prompts unless you need to adapt the syntax for your template engine.
+
+We strongly recommend keep the prompts in separate files and load them
+at runtime, rather than hardcoding them in your codebase.
+
+Whenever possible, use the original `.prompty` files from the `src/prompts` directory.
+
+## Phase 1: Test Generation
+
+PromptPex is a test generate framework for prompts. It is made of a graph of LLM transformations that eventually generate a set of
+inputs and expected outputs for a given prompt.
+
+- The core of the framework is documented in [Test Generation](docs/src/content/docs/reference/test-generation.md).
+
+## Phase: Validate Test Generation
+
+Once you have implemented the test generation, you should validate it on a prompt.
+
+- generate examples of prompty files (you can use this example as a prompt test case)
+
+```markdown
+---
+tags:
+    - unlisted
+---
+
+system:
+You need to categorize a joke as funny or not.
+Respond with "funny" or "not funny".
+
+user:
+{{joke}}
+```
+
+- for each prompty file, create a test case (using the test framework of your choice) that checks that your implementation is able to run and generate test cases.
+
+- run the tests cases and fix the code until all tests pass.
+
+## Phase 3: Additional features
+
+You will find additional features for promptpex generation that can be useful to add once the basic test generation is working.
+
+- [Groundtruth](docs/src/content/docs/reference/groundtruth.md): how to generate groundtruth for prompts.
+- [Test Samples](docs/src/content/docs/reference/test-samples.md): how to integrate existing test samples into the test generation process.
+- [Test Expansion](docs/src/content/docs/reference/test-expansion.md): how to morph tests into more complex longer texts.
+- [Scenarios](docs/src/content/docs/reference/scenarios.md): how to receive custom set of inputs instructions from the users and use it to guide the generation of tests.
+
+### Notes
+
+You can assume that the secrets are already set in the environment or in a `.env` file
+that can be loaded using a library.
+
+## Reference
+
+You can read the following page to understand the PromptPex framework and how to use it in your codebase:
+
+- [Glossary](docs/src/content/docs/reference/glossary.md): A glossary of terms used in the PromptPex framework.
+- [Test Generation](docs/src/content/docs/reference/test-generation.md): The core of the framework, how to generate tests for prompts.
+- The prompts are `.prompty` files in the [prompts directory](src/prompts).
+- The **.prompty** format is documented in [Prompt Format](docs/src/content/docs/reference/prompt-format.md).
+
+## Reference implementation
+
+The GenAIScript reference implementation is in the `/src/genaiscript` directory. PromptPex starts in `src/genaiscript/src/promptpex.mts`.
+
+It is implemented using [GenAIScript](https://microsoft.github.io/genaiscript/).
+
+**Following the patterns and habits of the the target framework/language you are generating**.
+The reference implementation is a good starting point but you should adapt it to the target framework/language you are generating.
diff --git a/.github/workflows/action.yml b/.github/workflows/action.yml
@@ -0,0 +1,55 @@
+name: Action Continuous Integration
+on:
+    workflow_dispatch:
+    push:
+        branches:
+            - dev
+permissions:
+    contents: read
+    models: read
+jobs:
+    test:
+        runs-on: ubuntu-latest
+        steps:
+            - uses: actions/checkout@v4
+            - uses: actions/setup-node@v4
+              with:
+                  node-version: "22"
+                  cache: npm
+            # Cache the generated model requests made by GenAIScript
+            #
+            # A new cache is created for each run to ensure that the latest model requests are used,
+            # but previous caches can be restored and reused if available.
+            - uses: actions/cache@v4
+              with:
+                  path: .genaiscript/cache/**
+                  key: genaiscript-${{ github.workflow }}-${{ github.run_id }}
+                  restore-keys: |
+                      genaiscript-
+            - run: npm ci
+            - run: npm run build
+    test-action:
+        needs: test
+        runs-on: ubuntu-latest
+        steps:
+            - uses: actions/checkout@v4
+            # Cache the generated model requests made by GenAIScript
+            #
+            # A new cache is created for each run to ensure that the latest model requests are used,
+            # but previous caches can be restored and reused if available.
+            - uses: actions/cache@v4
+              with:
+                  path: .genaiscript/cache/**
+                  key: genaiscript-${{ github.workflow }}-${{ github.run_id }}
+                  restore-keys: |
+                      genaiscript-
+            - uses: ./
+              with:
+                  prompt: |
+                      system:
+                      Is this joke funny?
+                      user:
+                      {{ input }}
+                  effort: min
+                  github_token: ${{ secrets.GITHUB_TOKEN }}
+                  debug: "script"
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -7,6 +7,9 @@ on:
             - dev
 permissions:
     contents: write
+concurrency:
+    group: ${{ github.workflow }}-${{ github.ref }}
+    cancel-in-progress: true
 defaults:
     run:
         working-directory: ./docs
@@ -20,7 +23,7 @@ jobs:
                   fetch-depth: 10
             - uses: actions/setup-node@v4
               with:
-                  node-version: "20"
+                  node-version: "22"
                   cache: npm
             - run: npm ci
             - name: Build docs

diff --git a/.github/workflows/genai-iat.yml b/.github/workflows/genai-iat.yml
diff --git a/.github/workflows/genai-issue-labeller.yml b/.github/workflows/genai-issue-labeller.yml
@@ -0,0 +1,19 @@
+name: GenAI Issue Labeller
+on:
+    issues:
+        types: [opened]
+permissions:
+    contents: read
+    issues: write
+    models: read
+concurrency:
+    group: ${{ github.workflow }}-${{ github.ref }}
+    cancel-in-progress: true
+jobs:
+    genai-issue-labeller:
+        runs-on: ubuntu-latest
+        steps:
+            - uses: actions/checkout@v4
+            - uses: pelikhan/action-genai-issue-labeller@v0
+              with:
+                  github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/genai-pr.yml b/.github/workflows/genai-pr.yml
diff --git a/.github/workflows/genai-pull-request-descriptor.yml b/.github/workflows/genai-pull-request-descriptor.yml
@@ -0,0 +1,20 @@
+name: GenAI Pull Request Descriptor
+on:
+  pull_request:
+    types: [opened, reopened, ready_for_review]
+permissions:
+  contents: read
+  pull-requests: write
+  models: read
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+jobs:
+  generate-pull-request-description:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: pelikhan/action-genai-pull-request-descriptor@v0
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+
diff --git a/.github/workflows/genai-test.yml b/.github/workflows/genai-test.yml