diff --git a/.github/workflows/single-card-demo-tests-impl.yaml b/.github/workflows/single-card-demo-tests-impl.yaml
index 92c115ab9c96..c5782e2947b7 100644
--- a/.github/workflows/single-card-demo-tests-impl.yaml
+++ b/.github/workflows/single-card-demo-tests-impl.yaml
@@ -88,6 +88,7 @@ jobs:
           #          # Moved to t3k tests until OOM on single card runners resolved
                     # { name: "qwen7b", runner-label: "N300", performance: false, cmd: run_qwen7b_func, owner_id: U03PUAKE719}, # Mark O'Connor
                     { name: "qwen25_vl", runner-label: "N300", performance: false, cmd: run_qwen25_vl_func, owner_id: U07RY6B5FLJ},  #Gongyu Wang
+                    # { name: "gemma3_1b", runner-label: "N150", performance: false, cmd: run_gemma3_1b_func, owner_id:}, # TODO Owner ID needs to be updated
         ]
     name: ${{ matrix.test-group.name }}-${{ matrix.test-group.runner-label }}-${{ (matrix.test-group.performance && 'perf') || 'func' }}
     env:
diff --git a/models/tt_transformers/PERF.md b/models/tt_transformers/PERF.md
index 737ef759fdbb..e301d5b3a7b3 100644
--- a/models/tt_transformers/PERF.md
+++ b/models/tt_transformers/PERF.md
@@ -45,6 +45,7 @@ This configuration uses bfp4 MLP and bfp8 attention weights for all models excep
 | Mistral-7B        | N150        | 95        | 99        | 29.75         | 100.24    |
 | Mistral-7B        | N300        | 95        | 99        | 47.01         | 65.95     |
 | Mistral-7B        | T3K         | 95        | 99        | 67.82         | 53.93     |
+| gemma-3-1b        | N150        | 30        | 40        | 40.00         | 40.00     |
 
 
 ## Accuracy
@@ -82,6 +83,7 @@ Llama 3 models test as insensitive to attention precision and so we use bfp8 att
 | Mistral-7B        | N150        | 95        | 99        | 29.75         | 100.24    |
 | Mistral-7B        | N300        | 95        | 99        | 47.01         | 65.95     |
 | Mistral-7B        | T3K         | 95        | 99        | 67.82         | 53.93     |
+| gemma-3-1b        | N150        | 30        | 40        | 40.00         | 40.00     |
 
 ##  Long-context (64K Tokens)
 
diff --git a/models/tt_transformers/demo/simple_text_demo.py b/models/tt_transformers/demo/simple_text_demo.py
index c58504228ef4..b8326b2861c9 100644
--- a/models/tt_transformers/demo/simple_text_demo.py
+++ b/models/tt_transformers/demo/simple_text_demo.py
@@ -945,7 +945,15 @@ def test_demo_text(
     )
 
     # Benchmark targets
-    supported_models = ["Llama-3.2-1B", "Llama-3.2-3B", "Llama-3.1-8B", "Llama-3.2-11B", "Llama-3.1-70B", "Mistral-7B"]
+    supported_models = [
+        "Llama-3.2-1B",
+        "Llama-3.2-3B",
+        "Llama-3.1-8B",
+        "Llama-3.2-11B",
+        "Llama-3.1-70B",
+        "Mistral-7B",
+        "gemma-3-1b",
+    ]
     supported_devices = ["N150", "P100", "P150", "P300", "N300", "P150x4", "T3K", "TG"]
 
     tt_device_name = determine_device_name(mesh_device)  # submesh device should not decide performance target
@@ -994,6 +1002,8 @@ def test_demo_text(
             "N300_Mistral-7B": 38,  # TODO Update target
             "T3K_Mistral-7B": 45,  # TODO Update target
             "TG_Mistral-7B": 45,  # TODO Update target
+            #
+            "N150_gemma-3-1b": 25,
         }
         if model_device_key in dict_target_decode_tok_s_u:
             target_decode_tok_s_u = dict_target_decode_tok_s_u[model_device_key]
@@ -1075,6 +1085,7 @@ def test_demo_text(
                 # "T3K_Qwen2.5-Coder-32B": 180,  # too much variability in CI (https://github.com/tenstorrent/tt-metal/issues/24754)
                 # "T3K_Qwen2.5-72B": 211,  # too much variability in CI (https://github.com/tenstorrent/tt-metal/issues/24754)
                 # "T3K_Qwen3-32B": 250, # too much variability in CI (https://github.com/tenstorrent/tt-metal/issues/24754)
+                "N150_gemma-3-1b": 100,
             }
             ci_target_decode_tok_s_u = {
                 # N150 targets - higher is better
@@ -1082,6 +1093,7 @@ def test_demo_text(
                 "N150_Llama-3.2-3B": 35,
                 "N150_Llama-3.1-8B": 21,
                 "N150_Mistral-7B": 23,
+                "N150_gemma-3-1b": 25,
                 # N300 targets
                 "N300_Qwen2.5-7B": 20,
                 # T3K targets
diff --git a/models/tt_transformers/tests/reference_outputs/gemma-3-1b-it.refpt b/models/tt_transformers/tests/reference_outputs/gemma-3-1b-it.refpt
new file mode 100644
index 000000000000..d57fab334f4b
Binary files /dev/null and b/models/tt_transformers/tests/reference_outputs/gemma-3-1b-it.refpt differ
diff --git a/models/tt_transformers/tests/test_accuracy.py b/models/tt_transformers/tests/test_accuracy.py
index 78de89940988..2515dab97864 100644
--- a/models/tt_transformers/tests/test_accuracy.py
+++ b/models/tt_transformers/tests/test_accuracy.py
@@ -245,6 +245,19 @@ def test_tt_model_acc(
             theta=model_args.rope_theta,
             rope_scaling=model_args.rope_scaling,
         )
+
+        if model_args.rope_local_theta is not None:
+            rope_setup_prefill_local = get_prefill_rot_mat(
+                model_args.head_dim,
+                mesh_device,
+                prefill_lens[0],
+                model_args.rope_local_theta,
+                model_args.rope_scaling_factor,
+                model_args.orig_context_len,
+            )
+        else:
+            rope_setup_prefill_local = None
+
         prefill_input = model_args.prepare_residual_tensor_prefill(
             pt_prefill_input[batch_id],
         )
@@ -252,7 +265,7 @@ def test_tt_model_acc(
         tt_out = tt_model(
             prefill_input,
             current_pos=None,
-            rot_mats=rot_mats_prefill,
+            rot_mats=[rot_mats_prefill, rope_setup_prefill_local],
             user_id=batch_id,
             mode="prefill",
             page_table=page_table_tt,
@@ -280,6 +293,7 @@ def test_tt_model_acc(
 
     # Get cos/sin matrices for the current position of each user
     rot_mats = tt_model.rope_setup.get_rot_mats(current_pos)
+    rot_mats_local = None if tt_model.rope_setup_local is None else tt_model.rope_setup_local.get_rot_mats(current_pos)
 
     # Print table header
     if use_reference_file:
@@ -310,7 +324,7 @@ def test_tt_model_acc(
         tt_out = tt_model(
             decode_input,
             current_pos_tensor,
-            rot_mats=rot_mats,
+            rot_mats=[rot_mats, rot_mats_local],
             mode="decode",
             page_table=page_table_tt,
         )
@@ -351,6 +365,9 @@ def test_tt_model_acc(
         # Update rot_mats for next iteration
         current_pos += 1
         rot_mats = tt_model.rope_setup.get_rot_mats(current_pos)
+        rot_mats_local = (
+            None if tt_model.rope_setup_local is None else tt_model.rope_setup_local.get_rot_mats(current_pos)
+        )
 
         # Modify the accuracy checking section when using reference text
         if not use_reference_file:
diff --git a/tests/scripts/single_card/run_single_card_demo_tests.sh b/tests/scripts/single_card/run_single_card_demo_tests.sh
index a058614bb532..e75b77457c2f 100755
--- a/tests/scripts/single_card/run_single_card_demo_tests.sh
+++ b/tests/scripts/single_card/run_single_card_demo_tests.sh
@@ -49,6 +49,13 @@ run_qwen25_vl_func() {
   fi
 }
 
+
+run_gemma3_1b_func(){
+
+  MESH_DEVICE=N150 HF_MODEL=google/gemma-3-1b-it WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/tt_transformers/demo/simple_text_demo.py -k performance-ci-1 --timeout 1800
+
+}
+
 run_segformer_func() {
   #Segformer Segmentation Demo
   WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest --disable-warnings models/demos/segformer/demo/demo_for_semantic_segmentation.py --timeout 600; fail+=$?