Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llm_action/.env.example
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# LLM
ANTHROPIC_API_KEY = "sk-ant-api03-..."
GEMINI_API_KEY = "..."
GROQ_API_KEY = "gsk_..."

# MLIR
MLIR_SHARED_LIBS=/path/to/llvm-project/build/lib/libomp.so,/path/to/llvm-project/build/lib/libmlir_c_runner_utils.so,/path/to/llvm-project/build/lib/libmlir_runner_utils.so
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module {
func.func private @nanoTime() -> i64 attributes {llvm.emit_c_interface}
func.func @main(%arg0: memref<128x32x7x7xf64>, %arg1: memref<256x32x1x1xf64>, %arg2: memref<128x256x7x7xf64>) -> i64 attributes {llvm.emit_c_interface} {
%0 = call @nanoTime() : () -> i64
linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>, tag = "operation_0"} ins(%arg0, %arg1 : memref<128x32x7x7xf64>, memref<256x32x1x1xf64>) outs(%arg2 : memref<128x256x7x7xf64>)
%2 = call @nanoTime() : () -> i64
%3 = arith.subi %2, %0 : i64
return %3 : i64
}
}
29 changes: 29 additions & 0 deletions llm_action/data/memref/conv2d/conv_2d_nchw_fchw_template.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
module {
func.func private @nanoTime() -> i64 attributes {llvm.emit_c_interface}
func.func @main(
%arg0: memref<[N]x[C]x[H]x[W]xf64>,
%arg1: memref<[F]x[C]x[KH]x[KW]xf64>,
%arg2: memref<[N]x[F]x[OH]x[OW]xf64>
) -> i64
attributes {llvm.emit_c_interface} {
%0 = call @nanoTime() : () -> i64
linalg.conv_2d_nchw_fchw
{ dilations = dense<1> : tensor<2xi64>,
strides = dense<1> : tensor<2xi64>,
tag = "operation_0"
}
ins(
%arg0, %arg1 :
memref<[N]x[C]x[H]x[W]xf64>,
memref<[F]x[C]x[KH]x[KW]xf64>
)
outs(
%arg2 : memref<[N]x[F]x[OH]x[OW]xf64>
)

%2 = call @nanoTime() : () -> i64
%3 = arith.subi %2, %0 : i64

return %3 : i64
}
}
25 changes: 25 additions & 0 deletions llm_action/data/memref/generic/generic_8_8_16_8_32.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
module {
func.func private @nanoTime() -> i64 attributes {llvm.emit_c_interface}
func.func @main(
%arg0: memref<8x8x16x8x32xf64>,
%arg1: memref<8x8x16x8x32xf64>
) -> i64 attributes {llvm.emit_c_interface} {
%t0 = call @nanoTime() : () -> i64
linalg.generic {
indexing_maps = [
affine_map<(a,b,c,d,e) -> (a,b,c,d,e)>,
affine_map<(a,b,c,d,e) -> (a,b,c,d,e)>
],
iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"],
tag = "operation_0"
} ins(%arg0 : memref<8x8x16x8x32xf64>)
outs(%arg1 : memref<8x8x16x8x32xf64>) {
^bb0(%in: f64, %acc: f64):
%sum = arith.addf %acc, %in : f64
linalg.yield %sum : f64
}
%t1 = call @nanoTime() : () -> i64
%dt = arith.subi %t1, %t0 : i64
return %dt : i64
}
}
25 changes: 25 additions & 0 deletions llm_action/data/memref/generic/generic_template.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
module {
func.func private @nanoTime() -> i64 attributes {llvm.emit_c_interface}
func.func @main(
%arg0: memref<[A]x[B]x[C]x[D]x[E]xf64>,
%arg1: memref<[A]x[B]x[C]x[D]x[E]xf64>
) -> i64 attributes {llvm.emit_c_interface} {
%t0 = call @nanoTime() : () -> i64
linalg.generic {
indexing_maps = [
affine_map<(a,b,c,d,e) -> (a,b,c,d,e)>,
affine_map<(a,b,c,d,e) -> (a,b,c,d,e)>
],
iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"],
tag = "operation_0"
} ins(%arg0 : memref<[A]x[B]x[C]x[D]x[E]xf64>)
outs(%arg1 : memref<[A]x[B]x[C]x[D]x[E]xf64>) {
^bb0(%in: f64, %acc: f64):
%sum = arith.addf %acc, %in : f64
linalg.yield %sum : f64
}
%t1 = call @nanoTime() : () -> i64
%dt = arith.subi %t1, %t0 : i64
return %dt : i64
}
}
10 changes: 10 additions & 0 deletions llm_action/data/memref/matmul/matmul_128_256_128.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module {
func.func private @nanoTime() -> i64 attributes {llvm.emit_c_interface}
func.func @main(%arg0: memref<128x256xf64>, %arg1: memref<256x128xf64>, %arg2: memref<128x128xf64>) -> i64 attributes {llvm.emit_c_interface} {
%0 = call @nanoTime() : () -> i64
linalg.matmul {tag = "operation_0"} ins(%arg0, %arg1 : memref<128x256xf64>, memref<256x128xf64>) outs(%arg2 : memref<128x128xf64>)
%2 = call @nanoTime() : () -> i64
%3 = arith.subi %2, %0 : i64
return %3 : i64
}
}
10 changes: 10 additions & 0 deletions llm_action/data/memref/matmul/matmul_24576_768_384.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module {
func.func private @nanoTime() -> i64 attributes {llvm.emit_c_interface}
func.func @main(%arg0: memref<24576x768xf64>, %arg1: memref<768x384xf64>, %arg2: memref<24576x384xf64>) -> i64 attributes {llvm.emit_c_interface} {
%0 = call @nanoTime() : () -> i64
linalg.matmul {tag = "operation_0"} ins(%arg0, %arg1 : memref<24576x768xf64>, memref<768x384xf64>) outs(%arg2 : memref<24576x384xf64>)
%2 = call @nanoTime() : () -> i64
%3 = arith.subi %2, %0 : i64
return %3 : i64
}
}
10 changes: 10 additions & 0 deletions llm_action/data/memref/matmul/matmul_256_512_1024.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module {
func.func private @nanoTime() -> i64 attributes {llvm.emit_c_interface}
func.func @main(%arg0: memref<256x512xf64>, %arg1: memref<512x1024xf64>, %arg2: memref<256x1024xf64>) -> i64 attributes {llvm.emit_c_interface} {
%0 = call @nanoTime() : () -> i64
linalg.matmul {tag = "operation_0"} ins(%arg0, %arg1 : memref<256x512xf64>, memref<512x1024xf64>) outs(%arg2 : memref<256x1024xf64>)
%2 = call @nanoTime() : () -> i64
%3 = arith.subi %2, %0 : i64
return %3 : i64
}
}
10 changes: 10 additions & 0 deletions llm_action/data/memref/matmul/matmul_512_512_512.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module {
func.func private @nanoTime() -> i64 attributes {llvm.emit_c_interface}
func.func @main(%arg0: memref<512x512xf64>, %arg1: memref<512x512xf64>, %arg2: memref<512x512xf64>) -> i64 attributes {llvm.emit_c_interface} {
%0 = call @nanoTime() : () -> i64
linalg.matmul {tag = "operation_0"} ins(%arg0, %arg1 : memref<512x512xf64>, memref<512x512xf64>) outs(%arg2 : memref<512x512xf64>)
%2 = call @nanoTime() : () -> i64
%3 = arith.subi %2, %0 : i64
return %3 : i64
}
}
14 changes: 14 additions & 0 deletions llm_action/data/memref/matmul/matmul_template.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
module {
func.func private @nanoTime() -> i64 attributes {llvm.emit_c_interface}
func.func @main(
%arg0: memref<[I]x[J]xf64>,
%arg1: memref<[J]x[K]xf64>,
%arg2: memref<[I]x[K]xf64>
) -> i64 attributes {llvm.emit_c_interface} {
%0 = call @nanoTime() : () -> i64
linalg.matmul {tag = "operation_0"} ins(%arg0, %arg1 : memref<[I]x[J]xf64>, memref<[J]x[K]xf64>) outs(%arg2 : memref<[I]x[K]xf64>)
%2 = call @nanoTime() : () -> i64
%3 = arith.subi %2, %0 : i64
return %3 : i64
}
}
2 changes: 1 addition & 1 deletion llm_action/docs/MCP_REFERENCE.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ Args:
Returns:
float: the median execution time in milliseconds.

## measure_speedup(mlir_base_execution_time: float, mlir_optimized_execution_time: float, torch_execution_time: float) -> dict[str, float]
## measure_speedup(mlir_base_execution_time: float, mlir_optimized_execution_time: float, torch_execution_time: Optional[float] = None) -> dict[str, float]
Measures the speedup achieved by MLIR transformations.

This tool compares the execution time of base code against transformed code
Expand Down
17 changes: 15 additions & 2 deletions llm_action/playground/actions/tests/tiling.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from llm_action.src.models import KernelType
from llm_action.src.utils.persistence import load_kernel_code
from llm_action.src.execution.mlir_execution import execute_mlir

from llm_action.playground.actions.candidates.Tiling_af31 import TilingAction
from llm_action.playground.actions.candidates.Tile import Tile
Expand All @@ -24,17 +25,29 @@
ACTION = Tiling

for kernel_type in [KernelType.MATMUL, KernelType.CONV2D, KernelType.GENERIC]:
print(f"--- Testing Tiling Action on {kernel_type.value} Kernel ---\n")
print(f"--- Testing {ACTION.__name__} on {kernel_type.value} Kernel ---\n")
code = load_kernel_code(kernel_type)
print(f"Original Code:\n{code}\n")



print(f"Original Execution; Success = {success}, Time = {original_time_ns} ns")

parameters = params_per_kernel[kernel_type]

print(f"Using Parameters: {parameters}\n")

if ACTION.precondition(code, parameters):
transformed_code = ACTION.implement(code, parameters)

print(f"Transformed Code:\n{transformed_code}\n")

transformed_time_ns, success = execute_mlir(transformed_code)

print(f"Transformed Execution; Success = {success}, Time = {transformed_time_ns} ns")

print(f"Speedup = {(original_time_ns / transformed_time_ns):.4f}")

if ACTION.postcondition(code, transformed_code, parameters):
print(f"Postcondition satisfied: {ACTION.__name__} applied successfully.")
else:
Expand Down
7 changes: 3 additions & 4 deletions llm_action/resources/prompts/v1/action_enumeration.md
Original file line number Diff line number Diff line change
Expand Up @@ -331,12 +331,11 @@ Acceptable kernel-specific examples (when framed generically):
## Examples (non-exhaustive):
- Tiling / blocking
- Interchange (loop permutation)
- Fusion (producer-consumer)
- Vectorization (SIMD-friendly restructuring)
- Parallelization / distribution
- Promotion
- Packing / layout transformation
- Unrolling / jamming / peeling
- Decomposition of complex ops
- Bufferization strategy (conceptual)
- Canonicalization / simplification (conceptual)
- Special kernel-specific operations (e.g., im2col for convolution)
Expand Down Expand Up @@ -395,8 +394,8 @@ class ActionEnumeration(BaseModel):

# Output Constraints

- Produce **2-3 optimization intents**.
- Each intent must contain **2-3 transformations**.
- Produce **3-5 optimization intents**.
- Each intent must contain **3-5 transformations**.
- Use consistent transformation names across intents (avoid duplicates with different names).
- Keep descriptions concise (1-2 sentences).
- Do **not** include parameter knobs, preconditions, ordering rules, or code.
Expand Down
41 changes: 26 additions & 15 deletions llm_action/resources/prompts/v1/action_implementation.md
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,21 @@ You are **not** responsible for:
Your job is to turn **one abstract transformation idea** into
**one concrete executable action**.

# Hardware Specifications
- Primary target: **HPC-class CPU** — specifically **Intel Xeon E5-2680 v4 (Broadwell-class)**.
- Topology:
* **28 physical cores** (2 sockets x 14 cores), **2 NUMA nodes**.
* **No SMT / Hyper-threading disabled** (threads per core = 1).
- SIMD / ISA capabilities:
* **AVX2 + FMA available**.
* **No AVX-512** (do not assume AVX-512 vector widths, masks, or AVX-512-specific lowering).
* Practical vector lane guidance:
- FP32: typically 8 lanes per vector (256-bit)
- FP64: typically 4 lanes per vector (256-bit)
- Cache hierarchy characteristics:
* L1d ~32KB per core, L2 ~256KB per core, shared L3 per socket (~tens of MB).
- Number of cores in the execution environment (submitted MLIR/PyTorch jobs): **16 physical cores**.

# Your Task

You will be given the following inputs:
Expand Down Expand Up @@ -337,7 +352,7 @@ Each Action must define the following conceptual stages:

## Tooling Available (Allowed and Encouraged)

You may use the following tool to validate the MLIR transform while synthesizing it:
You may use the following MCP tools to validate the MLIR transform while synthesizing it:

- `delegate_documentation_lookup(task: str) -> str`
Delegates Transform dialect documentation lookup to a deterministic retrieval agent. Example tasks:
Expand All @@ -346,13 +361,13 @@ You may use the following tool to validate the MLIR transform while synthesizing
- "What is the Transform dialect op for loop interchange?"
This lookup agent provides authoritative, pre-scraped MLIR Transform dialect documentation, including exact operation names, required handles, key attributes, and minimal Transform IR skeletons, and should be used to ground Transform dialect usage before implementation.

- `transform_code(code: str, transformation_code: str) -> str`
- `transform_mlir_code(code: str, transformation_code: str) -> str`
Applies Transform dialect code and returns transformed MLIR.

- `execute_code(code: str) -> tuple[int, bool]`
- `execute_mlir_code(code: str) -> tuple[float, bool]`
Executes the payload and returns (execution_time in ms, success_flag).

- `measure_speedup(base_execution_time: float, execution_time: float) -> float`
- `measure_speedup(mlir_base_execution_time: float, mlir_optimized_execution_time: float) -> float`
Computes the relative speedup between baseline and transformed execution times.

Use these tools to ensure your transform snippet is syntactically valid, changes the IR when it should, and preserves executability when appropriate. Make sure to input actual MLIR code instances (actual numbers instead of [I], [OH], etc.).
Expand All @@ -370,17 +385,17 @@ For each kernel instance you test during synthesis, follow systematically this p
call `delegate_documentation_lookup(...)` before writing or revising transform IR.

2. **Baseline execution sanity**
- Call `execute_code(original_code)`.
- Call `execute_mlir_code(original_code)`.
- Require `success_flag == True`.
- If baseline execution fails, do not proceed with transform testing on that instance.

3. **Transform application sanity**
- Call `transform_code(original_code, transform_ir)`.
- Call `transform_mlir_code(original_code, transform_ir)`.
- Require that the returned MLIR differs from the input (`transformed.strip() != original.strip()`).
- If the transform produces identical code or throws, treat it as a failed transform attempt.

4. **Post-transform execution sanity**
- Call `execute_code(transformed_code)`.
- Call `execute_mlir_code(transformed_code)`.
- Require `success_flag == True`.
- If execution fails, the transform is not acceptable and must be revised.

Expand All @@ -401,18 +416,14 @@ and must NOT materialize large tensor tiles as vectors.
When a transformation introduces `vector<...>` types, you MUST ensure:

1) **Bound total vector size**
- Let `N = product(static vector dimensions)`.
- Limits by element type:
- `f64` / `i64`: `N ≤ 16`
- `f32` / `i32`: `N ≤ 32`
- `f16` / `bf16` / `i16`: `N ≤ 64`
- `i8`: `N ≤ 128`
- Let `N = product(static vector dimensions: multiplication of the vector elements)`.
- Limits `N ≤ 1024`
- If any vector exceeds its bound → **reject the candidate immediately**.

2) **Limit vector rank**
- Prefer rank-1 vectors: `vector<kxf32>`
- Allow rank-2 and rank-3 vectors only if small (e.g. `vector<4x8xf32>, vector<4x4x4xf32>`).
- Rank ≥ 4 vectors are **disallowed**, regardless of element count.
- Allow rank-2 vectors only if small (e.g. `vector<4x8xf32>`)
- Rank ≥ 3 vectors are **disallowed**, unless they are very small (e.g. `vector<2x2x2xf32>`, `vector<4x4x4xf32>`, ...).

3) **No tile-as-vector lowering**
- Vectors resembling whole tiles or buffers
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
TASK:
How to do vectorization in MLIR Transform dialect?

RESPONSE:
{
"error": {
"code": 429,
"message": "You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 20, model: gemini-2.5-flash\nPlease retry in 443.929576ms.",
"status": "RESOURCE_EXHAUSTED",
"details": [
{
"@type": "type.googleapis.com/google.rpc.Help",
"links": [
{
"description": "Learn more about Gemini API quotas",
"url": "https://ai.google.dev/gemini-api/docs/rate-limits"
}
]
},
{
"@type": "type.googleapis.com/google.rpc.QuotaFailure",
"violations": [
{
"quotaMetric": "generativelanguage.googleapis.com/generate_content_free_tier_requests",
"quotaId": "GenerateRequestsPerDayPerProjectPerModel-FreeTier",
"quotaDimensions": {
"location": "global",
"model": "gemini-2.5-flash"
},
"quotaValue": "20"
}
]
},
{
"@type": "type.googleapis.com/google.rpc.RetryInfo",
"retryDelay": "0s"
}
]
}
}
Loading