From d9ab1cb072af8ad50e567f7293ad93ae364e7054 Mon Sep 17 00:00:00 2001
From: logansg <logansg@umich.edu>
Date: Sat, 4 Apr 2026 14:23:25 -0400
Subject: [PATCH 1/2] discourage wrappers

---
 src/generator/prompts/GeneratorSystemPrompt.md       | 2 ++
 src/generator/prompts/TritonGeneratorSystemPrompt.md | 2 ++
 src/optimizer/backends/cuda/prompts.py               | 1 +
 src/optimizer/backends/triton/prompts.py             | 1 +
 4 files changed, 6 insertions(+)

diff --git a/src/generator/prompts/GeneratorSystemPrompt.md b/src/generator/prompts/GeneratorSystemPrompt.md
index 5aa59091..b963e9e8 100644
--- a/src/generator/prompts/GeneratorSystemPrompt.md
+++ b/src/generator/prompts/GeneratorSystemPrompt.md
@@ -24,6 +24,8 @@ Your job is to generate a **single compilable CUDA PyTorch extension source file
 
 4. No text, explanation, or comments outside the code block.
 
+5. Do NOT delegate the target operation to PyTorch, ATen, Triton built-ins, or any wrapper around them; implement the computation directly in the kernel.
+
 -----------------------------------------------
  EXAMPLE CODE STRUCTURE
 -----------------------------------------------
diff --git a/src/generator/prompts/TritonGeneratorSystemPrompt.md b/src/generator/prompts/TritonGeneratorSystemPrompt.md
index f8f686f9..b7d33070 100644
--- a/src/generator/prompts/TritonGeneratorSystemPrompt.md
+++ b/src/generator/prompts/TritonGeneratorSystemPrompt.md
@@ -25,6 +25,8 @@ Your job is to generate a **single valid OpenAI Triton kernel file** named `kern
 
 4. No text, explanation, or comments outside the code block.
 
+5. Do NOT delegate the target operation to PyTorch, ATen, Triton built-ins, or any wrapper around them; implement the computation directly in the kernel.
+
 -----------------------------------------------
  EXAMPLE CODE STRUCTURE
 -----------------------------------------------
diff --git a/src/optimizer/backends/cuda/prompts.py b/src/optimizer/backends/cuda/prompts.py
index b9326eea..9d87efe7 100644
--- a/src/optimizer/backends/cuda/prompts.py
+++ b/src/optimizer/backends/cuda/prompts.py
@@ -42,6 +42,7 @@ def get_sys_prompt() -> str:
 3. **Preserve the exact function signature** of `launch()` - parameter order and types must NOT change HOWEVER if in the initial user request includes the parameters, may hard code or optimize for the given parameters
 4. The optimized code must be a drop-in replacement for the current kernel
 5. Do NOT put optimization comments inside the kernel code itself — all reasoning goes in the FEEDBACK block
+6. Do NOT delegate the target operation to PyTorch, ATen, Triton built-ins, or any wrapper around them; implement the computation directly in the kernel.
 
 -----------------------------------------------------------------------------------------------
 STARTING IDEAS FOR OPTIMIZATION STRATEGIES TO CONSIDER (but also be creative AND MAKE YOUR OWN
diff --git a/src/optimizer/backends/triton/prompts.py b/src/optimizer/backends/triton/prompts.py
index ed0e104a..9998b8c9 100644
--- a/src/optimizer/backends/triton/prompts.py
+++ b/src/optimizer/backends/triton/prompts.py
@@ -46,6 +46,7 @@ def get_sys_prompt() -> str:
 3. **Preserve the exact function signature** of `launch()` - parameter order and types must NOT change
 4. The optimized code must be a drop-in replacement for the current kernel
 5. Include a brief comment at the top explaining your optimization strategy (2-3 lines max)
+6. Do NOT delegate the target operation to PyTorch, ATen, Triton built-ins, or any wrapper around them; implement the computation directly in the kernel.
 
 -----------------------------------------------
  TRITON PROGRAMMING MODEL

From aa6eb7769fdfa3df0faa9c4d4dc39e2edf8eaa02 Mon Sep 17 00:00:00 2001
From: logansg <logansg@umich.edu>
Date: Sat, 4 Apr 2026 15:09:06 -0400
Subject: [PATCH 2/2] sharpen prompt

---
 src/generator/prompts/GeneratorSystemPrompt.md       | 2 +-
 src/generator/prompts/TritonGeneratorSystemPrompt.md | 2 +-
 src/optimizer/backends/cuda/prompts.py               | 2 +-
 src/optimizer/backends/triton/prompts.py             | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/generator/prompts/GeneratorSystemPrompt.md b/src/generator/prompts/GeneratorSystemPrompt.md
index b963e9e8..c006d9b5 100644
--- a/src/generator/prompts/GeneratorSystemPrompt.md
+++ b/src/generator/prompts/GeneratorSystemPrompt.md
@@ -24,7 +24,7 @@ Your job is to generate a **single compilable CUDA PyTorch extension source file
 
 4. No text, explanation, or comments outside the code block.
 
-5. Do NOT delegate the target operation to PyTorch, ATen, Triton built-ins, or any wrapper around them; implement the computation directly in the kernel.
+5. Do NOT delegate the target operation to ANY wrapper; implement the computation directly in the kernel.
 
 -----------------------------------------------
  EXAMPLE CODE STRUCTURE
diff --git a/src/generator/prompts/TritonGeneratorSystemPrompt.md b/src/generator/prompts/TritonGeneratorSystemPrompt.md
index b7d33070..f1fec8b8 100644
--- a/src/generator/prompts/TritonGeneratorSystemPrompt.md
+++ b/src/generator/prompts/TritonGeneratorSystemPrompt.md
@@ -25,7 +25,7 @@ Your job is to generate a **single valid OpenAI Triton kernel file** named `kern
 
 4. No text, explanation, or comments outside the code block.
 
-5. Do NOT delegate the target operation to PyTorch, ATen, Triton built-ins, or any wrapper around them; implement the computation directly in the kernel.
+5. Do NOT delegate the target operation to ANY wrapper; implement the computation directly in the kernel.
 
 -----------------------------------------------
  EXAMPLE CODE STRUCTURE
diff --git a/src/optimizer/backends/cuda/prompts.py b/src/optimizer/backends/cuda/prompts.py
index 9d87efe7..6acf57ef 100644
--- a/src/optimizer/backends/cuda/prompts.py
+++ b/src/optimizer/backends/cuda/prompts.py
@@ -42,7 +42,7 @@ def get_sys_prompt() -> str:
 3. **Preserve the exact function signature** of `launch()` - parameter order and types must NOT change HOWEVER if in the initial user request includes the parameters, may hard code or optimize for the given parameters
 4. The optimized code must be a drop-in replacement for the current kernel
 5. Do NOT put optimization comments inside the kernel code itself — all reasoning goes in the FEEDBACK block
-6. Do NOT delegate the target operation to PyTorch, ATen, Triton built-ins, or any wrapper around them; implement the computation directly in the kernel.
+6. Do NOT delegate the target operation to ANY wrapper; implement the computation directly in the kernel.
 
 -----------------------------------------------------------------------------------------------
 STARTING IDEAS FOR OPTIMIZATION STRATEGIES TO CONSIDER (but also be creative AND MAKE YOUR OWN
diff --git a/src/optimizer/backends/triton/prompts.py b/src/optimizer/backends/triton/prompts.py
index 9998b8c9..9255572c 100644
--- a/src/optimizer/backends/triton/prompts.py
+++ b/src/optimizer/backends/triton/prompts.py
@@ -46,7 +46,7 @@ def get_sys_prompt() -> str:
 3. **Preserve the exact function signature** of `launch()` - parameter order and types must NOT change
 4. The optimized code must be a drop-in replacement for the current kernel
 5. Include a brief comment at the top explaining your optimization strategy (2-3 lines max)
-6. Do NOT delegate the target operation to PyTorch, ATen, Triton built-ins, or any wrapper around them; implement the computation directly in the kernel.
+6. Do NOT delegate the target operation to ANY wrapper; implement the computation directly in the kernel.
 
 -----------------------------------------------
  TRITON PROGRAMMING MODEL