From 6efb698c9e534a061144da29c881291ff11ec870 Mon Sep 17 00:00:00 2001
From: oreomaker <zh002919@outlook.com>
Date: Fri, 23 Jan 2026 19:28:42 +0800
Subject: [PATCH 1/7] chore: remove qwen3 aot mir file

---
 qwen3_qnn_aot.mir | 2331 ---------------------------------------------
 1 file changed, 2331 deletions(-)
 delete mode 100644 qwen3_qnn_aot.mir

diff --git a/qwen3_qnn_aot.mir b/qwen3_qnn_aot.mir
deleted file mode 100644
index 1b8086df..00000000
--- a/qwen3_qnn_aot.mir
+++ /dev/null
@@ -1,2331 +0,0 @@
-@main () -> () {
-    graph.CallGraphOp @model (%9939:tensor<[1, 32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=0, solved=1)], %10013:tensor<[32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=1, solved=1)], %9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %9941:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1)], %9943:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1)], %9945:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1)], %9947:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1)], %9949:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1)], %9951:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1)], %9953:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1)], %9955:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1)], %9957:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1)], %9959:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1)], %9961:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1)], %9963:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1)], %9965:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1)], %9967:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1)], %9969:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1)], %9971:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1)], %9973:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1)], %9975:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1)], %9977:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1)], %9979:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1)], %9981:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1)], %9983:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1)], %9985:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1)], %9987:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1)], %9989:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1)], %9991:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1)], %9993:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1)], %9995:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1)], %9997:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1)], %9999:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1)], %10001:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1)], %10003:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1)], %10005:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1)], %10007:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1)], %10009:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1)], %10011:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1)], %9942:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1)], %9944:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1)], %9946:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1)], %9948:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1)], %9950:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1)], %9952:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1)], %9954:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1)], %9956:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1)], %9958:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1)], %9960:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1)], %9962:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1)], %9964:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1)], %9966:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1)], %9968:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1)], %9970:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1)], %9972:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1)], %9974:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1)], %9976:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1)], %9978:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1)], %9980:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1)], %9982:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1)], %9984:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1)], %9986:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1)], %9988:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1)], %9990:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1)], %9992:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1)], %9994:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1)], %9996:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1)], %9998:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1)], %10000:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1)], %10002:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1)], %10004:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1)], %10006:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1)], %10008:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1)], %10010:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1)], %10012:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1)]) -> (%12431:tensor<[1, 1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1449, solved=1)], %10047:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1)], %10114:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1)], %10181:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1)], %10248:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1)], %10315:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1)], %10382:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1)], %10449:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1)], %10516:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1)], %10583:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1)], %10650:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1)], %10717:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1)], %10784:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1)], %10851:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1)], %10918:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1)], %10985:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1)], %11052:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1)], %11119:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1)], %11186:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1)], %11253:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1)], %11320:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1)], %11387:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1)], %11454:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1)], %11521:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1)], %11588:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1)], %11655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1)], %11722:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1)], %11789:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1)], %11856:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1)], %11923:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1)], %11990:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1)], %12057:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1)], %12124:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1)], %12191:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1)], %12258:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1)], %12325:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1)], %12392:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1)], %10049:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=95, solved=1)], %10116:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=133, solved=1)], %10183:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=171, solved=1)], %10250:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=209, solved=1)], %10317:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=247, solved=1)], %10384:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=285, solved=1)], %10451:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=323, solved=1)], %10518:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=1)], %10585:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=399, solved=1)], %10652:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=437, solved=1)], %10719:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=475, solved=1)], %10786:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=513, solved=1)], %10853:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=551, solved=1)], %10920:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=589, solved=1)], %10987:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=627, solved=1)], %11054:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=665, solved=1)], %11121:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=703, solved=1)], %11188:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=741, solved=1)], %11255:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=1)], %11322:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=817, solved=1)], %11389:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=855, solved=1)], %11456:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=893, solved=1)], %11523:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=931, solved=1)], %11590:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=969, solved=1)], %11657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1007, solved=1)], %11724:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1045, solved=1)], %11791:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1083, solved=1)], %11858:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1121, solved=1)], %11925:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1159, solved=1)], %11992:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1197, solved=1)], %12059:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1235, solved=1)], %12126:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1273, solved=1)], %12193:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1311, solved=1)], %12260:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1349, solved=1)], %12327:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1387, solved=1)], %12394:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1425, solved=1)])
-    graph.SubGraphOp @model <CPU> [using_qnn:true, symbol:model] {
-        (%9939:tensor<[1, 32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=0, solved=1)], %10013:tensor<[32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=1, solved=1)], %9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %9941:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1)], %9943:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1)], %9945:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1)], %9947:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1)], %9949:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1)], %9951:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1)], %9953:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1)], %9955:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1)], %9957:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1)], %9959:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1)], %9961:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1)], %9963:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1)], %9965:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1)], %9967:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1)], %9969:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1)], %9971:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1)], %9973:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1)], %9975:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1)], %9977:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1)], %9979:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1)], %9981:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1)], %9983:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1)], %9985:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1)], %9987:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1)], %9989:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1)], %9991:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1)], %9993:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1)], %9995:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1)], %9997:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1)], %9999:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1)], %10001:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1)], %10003:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1)], %10005:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1)], %10007:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1)], %10009:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1)], %10011:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1)], %9942:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1)], %9944:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1)], %9946:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1)], %9948:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1)], %9950:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1)], %9952:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1)], %9954:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1)], %9956:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1)], %9958:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1)], %9960:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1)], %9962:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1)], %9964:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1)], %9966:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1)], %9968:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1)], %9970:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1)], %9972:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1)], %9974:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1)], %9976:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1)], %9978:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1)], %9980:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1)], %9982:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1)], %9984:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1)], %9986:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1)], %9988:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1)], %9990:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1)], %9992:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1)], %9994:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1)], %9996:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1)], %9998:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1)], %10000:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1)], %10002:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1)], %10004:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1)], %10006:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1)], %10008:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1)], %10010:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1)], %10012:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1)]) -> (%12431:tensor<[1, 1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1449, solved=1)], %10047:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1)], %10114:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1)], %10181:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1)], %10248:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1)], %10315:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1)], %10382:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1)], %10449:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1)], %10516:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1)], %10583:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1)], %10650:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1)], %10717:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1)], %10784:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1)], %10851:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1)], %10918:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1)], %10985:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1)], %11052:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1)], %11119:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1)], %11186:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1)], %11253:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1)], %11320:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1)], %11387:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1)], %11454:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1)], %11521:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1)], %11588:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1)], %11655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1)], %11722:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1)], %11789:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1)], %11856:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1)], %11923:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1)], %11990:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1)], %12057:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1)], %12124:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1)], %12191:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1)], %12258:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1)], %12325:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1)], %12392:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1)], %10049:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=95, solved=1)], %10116:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=133, solved=1)], %10183:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=171, solved=1)], %10250:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=209, solved=1)], %10317:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=247, solved=1)], %10384:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=285, solved=1)], %10451:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=323, solved=1)], %10518:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=1)], %10585:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=399, solved=1)], %10652:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=437, solved=1)], %10719:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=475, solved=1)], %10786:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=513, solved=1)], %10853:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=551, solved=1)], %10920:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=589, solved=1)], %10987:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=627, solved=1)], %11054:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=665, solved=1)], %11121:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=703, solved=1)], %11188:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=741, solved=1)], %11255:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=1)], %11322:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=817, solved=1)], %11389:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=855, solved=1)], %11456:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=893, solved=1)], %11523:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=931, solved=1)], %11590:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=969, solved=1)], %11657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1007, solved=1)], %11724:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1045, solved=1)], %11791:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1083, solved=1)], %11858:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1121, solved=1)], %11925:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1159, solved=1)], %11992:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1197, solved=1)], %12059:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1235, solved=1)], %12126:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1273, solved=1)], %12193:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1311, solved=1)], %12260:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1349, solved=1)], %12327:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1387, solved=1)], %12394:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1425, solved=1)]) {
-            graph.CallGraphOp @model.0.s32 (%9939:tensor<[1, 32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=0, solved=1)], %10013:tensor<[32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=1, solved=1)], %9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %9941:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1)], %9943:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1)], %9945:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1)], %9947:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1)], %9949:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1)], %9951:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1)], %9953:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1)], %9955:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1)], %9957:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1)], %9959:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1)], %9961:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1)], %9963:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1)], %9965:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1)], %9967:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1)], %9969:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1)], %9971:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1)], %9973:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1)], %9975:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1)], %9977:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1)], %9979:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1)], %9981:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1)], %9983:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1)], %9985:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1)], %9987:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1)], %9989:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1)], %9991:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1)], %9993:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1)], %9995:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1)], %9997:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1)], %9999:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1)], %10001:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1)], %10003:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1)], %10005:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1)], %10007:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1)], %10009:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1)], %10011:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1)], %9942:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1)], %9944:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1)], %9946:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1)], %9948:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1)], %9950:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1)], %9952:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1)], %9954:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1)], %9956:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1)], %9958:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1)], %9960:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1)], %9962:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1)], %9964:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1)], %9966:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1)], %9968:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1)], %9970:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1)], %9972:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1)], %9974:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1)], %9976:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1)], %9978:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1)], %9980:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1)], %9982:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1)], %9984:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1)], %9986:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1)], %9988:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1)], %9990:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1)], %9992:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1)], %9994:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1)], %9996:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1)], %9998:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1)], %10000:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1)], %10002:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1)], %10004:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1)], %10006:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1)], %10008:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1)], %10010:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1)], %10012:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1)]) -> (%12431:tensor<[1, 1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1449, solved=1)], %10047:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1)], %10114:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1)], %10181:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1)], %10248:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1)], %10315:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1)], %10382:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1)], %10449:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1)], %10516:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1)], %10583:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1)], %10650:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1)], %10717:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1)], %10784:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1)], %10851:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1)], %10918:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1)], %10985:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1)], %11052:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1)], %11119:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1)], %11186:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1)], %11253:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1)], %11320:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1)], %11387:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1)], %11454:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1)], %11521:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1)], %11588:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1)], %11655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1)], %11722:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1)], %11789:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1)], %11856:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1)], %11923:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1)], %11990:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1)], %12057:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1)], %12124:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1)], %12191:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1)], %12258:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1)], %12325:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1)], %12392:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1)], %10049:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=95, solved=1)], %10116:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=133, solved=1)], %10183:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=171, solved=1)], %10250:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=209, solved=1)], %10317:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=247, solved=1)], %10384:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=285, solved=1)], %10451:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=323, solved=1)], %10518:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=1)], %10585:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=399, solved=1)], %10652:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=437, solved=1)], %10719:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=475, solved=1)], %10786:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=513, solved=1)], %10853:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=551, solved=1)], %10920:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=589, solved=1)], %10987:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=627, solved=1)], %11054:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=665, solved=1)], %11121:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=703, solved=1)], %11188:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=741, solved=1)], %11255:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=1)], %11322:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=817, solved=1)], %11389:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=855, solved=1)], %11456:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=893, solved=1)], %11523:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=931, solved=1)], %11590:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=969, solved=1)], %11657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1007, solved=1)], %11724:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1045, solved=1)], %11791:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1083, solved=1)], %11858:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1121, solved=1)], %11925:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1159, solved=1)], %11992:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1197, solved=1)], %12059:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1235, solved=1)], %12126:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1273, solved=1)], %12193:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1311, solved=1)], %12260:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1349, solved=1)], %12327:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1387, solved=1)], %12394:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1425, solved=1)])
-            cf.ReturnOp (%12431:tensor<[1, 1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1449, solved=1)], %10047:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1)], %10114:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1)], %10181:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1)], %10248:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1)], %10315:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1)], %10382:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1)], %10449:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1)], %10516:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1)], %10583:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1)], %10650:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1)], %10717:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1)], %10784:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1)], %10851:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1)], %10918:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1)], %10985:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1)], %11052:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1)], %11119:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1)], %11186:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1)], %11253:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1)], %11320:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1)], %11387:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1)], %11454:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1)], %11521:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1)], %11588:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1)], %11655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1)], %11722:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1)], %11789:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1)], %11856:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1)], %11923:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1)], %11990:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1)], %12057:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1)], %12124:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1)], %12191:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1)], %12258:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1)], %12325:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1)], %12392:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1)], %10049:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=95, solved=1)], %10116:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=133, solved=1)], %10183:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=171, solved=1)], %10250:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=209, solved=1)], %10317:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=247, solved=1)], %10384:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=285, solved=1)], %10451:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=323, solved=1)], %10518:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=1)], %10585:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=399, solved=1)], %10652:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=437, solved=1)], %10719:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=475, solved=1)], %10786:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=513, solved=1)], %10853:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=551, solved=1)], %10920:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=589, solved=1)], %10987:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=627, solved=1)], %11054:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=665, solved=1)], %11121:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=703, solved=1)], %11188:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=741, solved=1)], %11255:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=1)], %11322:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=817, solved=1)], %11389:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=855, solved=1)], %11456:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=893, solved=1)], %11523:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=931, solved=1)], %11590:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=969, solved=1)], %11657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1007, solved=1)], %11724:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1045, solved=1)], %11791:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1083, solved=1)], %11858:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1121, solved=1)], %11925:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1159, solved=1)], %11992:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1197, solved=1)], %12059:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1235, solved=1)], %12126:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1273, solved=1)], %12193:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1311, solved=1)], %12260:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1349, solved=1)], %12327:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1387, solved=1)], %12394:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1425, solved=1)]) -> ()
-        }
-    }
-    //     ╔═════╗   
-    //    ║  o o  ║  
-    //    ║   ▽   ║  
-    //    ╚═════╝   
-    //     ║   ║     
-    //    ╱╩╦╦╩╲    
-    graph.SubGraphOp @model.0.s32 <notype> [use_qnn:true, symbol:model.0.s32] {
-        (%9939:tensor<[1, 32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=0, solved=1)], %10013:tensor<[32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=1, solved=1)], %9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %9941:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1)], %9943:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1)], %9945:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1)], %9947:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1)], %9949:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1)], %9951:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1)], %9953:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1)], %9955:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1)], %9957:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1)], %9959:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1)], %9961:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1)], %9963:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1)], %9965:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1)], %9967:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1)], %9969:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1)], %9971:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1)], %9973:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1)], %9975:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1)], %9977:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1)], %9979:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1)], %9981:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1)], %9983:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1)], %9985:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1)], %9987:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1)], %9989:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1)], %9991:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1)], %9993:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1)], %9995:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1)], %9997:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1)], %9999:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1)], %10001:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1)], %10003:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1)], %10005:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1)], %10007:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1)], %10009:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1)], %10011:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1)], %9942:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1)], %9944:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1)], %9946:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1)], %9948:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1)], %9950:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1)], %9952:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1)], %9954:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1)], %9956:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1)], %9958:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1)], %9960:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1)], %9962:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1)], %9964:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1)], %9966:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1)], %9968:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1)], %9970:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1)], %9972:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1)], %9974:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1)], %9976:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1)], %9978:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1)], %9980:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1)], %9982:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1)], %9984:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1)], %9986:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1)], %9988:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1)], %9990:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1)], %9992:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1)], %9994:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1)], %9996:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1)], %9998:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1)], %10000:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1)], %10002:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1)], %10004:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1)], %10006:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1)], %10008:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1)], %10010:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1)], %10012:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1)]) -> (%12431:tensor<[1, 1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1449, solved=1)], %10047:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1)], %10114:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1)], %10181:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1)], %10248:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1)], %10315:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1)], %10382:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1)], %10449:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1)], %10516:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1)], %10583:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1)], %10650:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1)], %10717:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1)], %10784:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1)], %10851:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1)], %10918:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1)], %10985:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1)], %11052:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1)], %11119:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1)], %11186:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1)], %11253:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1)], %11320:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1)], %11387:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1)], %11454:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1)], %11521:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1)], %11588:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1)], %11655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1)], %11722:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1)], %11789:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1)], %11856:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1)], %11923:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1)], %11990:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1)], %12057:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1)], %12124:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1)], %12191:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1)], %12258:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1)], %12325:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1)], %12392:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1)], %10049:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=95, solved=1)], %10116:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=133, solved=1)], %10183:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=171, solved=1)], %10250:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=209, solved=1)], %10317:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=247, solved=1)], %10384:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=285, solved=1)], %10451:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=323, solved=1)], %10518:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=1)], %10585:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=399, solved=1)], %10652:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=437, solved=1)], %10719:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=475, solved=1)], %10786:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=513, solved=1)], %10853:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=551, solved=1)], %10920:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=589, solved=1)], %10987:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=627, solved=1)], %11054:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=665, solved=1)], %11121:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=703, solved=1)], %11188:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=741, solved=1)], %11255:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=1)], %11322:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=817, solved=1)], %11389:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=855, solved=1)], %11456:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=893, solved=1)], %11523:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=931, solved=1)], %11590:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=969, solved=1)], %11657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1007, solved=1)], %11724:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1045, solved=1)], %11791:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1083, solved=1)], %11858:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1121, solved=1)], %11925:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1159, solved=1)], %11992:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1197, solved=1)], %12059:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1235, solved=1)], %12126:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1273, solved=1)], %12193:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1311, solved=1)], %12260:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1349, solved=1)], %12327:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1387, solved=1)], %12394:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1425, solved=1)]) {
-            linalg.CPU.EmbeddingOp <name="model.embed_tokens"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Int32), uuid=0, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)), using_qnn:true] (%9939:tensor<[1, 32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=0, solved=1)]) -> (%10014:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.GatherOp <name="model.Gather.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%4257:tensor<[1, 2048, 128], UInt16PerTensor, CPU>[@model.mllm_max_sin_embedding][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), symbol:model.mllm_max_sin_embedding], %10013:tensor<[32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=1, solved=1)]) -> (%10015:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.GatherOp <name="model.Gather.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%5747:tensor<[1, 2048, 128], UInt16PerTensor, CPU>[@model.mllm_max_cos_embedding][quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), symbol:model.mllm_max_cos_embedding], %10013:tensor<[32], Int32, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: Int32), uuid=1, solved=1)]) -> (%10016:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.0.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=78, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=79, solved=1)), using_qnn:true] (%10014:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10017:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=78, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.0.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=78, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=78, solved=1), ), using_qnn:true] (%10017:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=78, solved=1)]) -> (%10018:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=78, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.0.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=78, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=81, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=80, solved=1)), using_qnn:true] (%10018:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=78, solved=1)]) -> (%10019:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=81, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.0.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=78, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=82, solved=1)), using_qnn:true] (%10018:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=78, solved=1)]) -> (%10020:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.0.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=78, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=84, solved=1)), using_qnn:true] (%10018:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=78, solved=1)]) -> (%10021:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.0.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=81, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=81, solved=1), ), using_qnn:true] (%10019:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=81, solved=1)]) -> (%10022:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=81, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.0.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=81, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=81, solved=1), ), using_qnn:true] (%10022:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=81, solved=1)]) -> (%10023:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=81, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.0.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=1), ), using_qnn:true] (%10020:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=1)]) -> (%10024:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.0.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=1), ), using_qnn:true] (%10024:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=1)]) -> (%10025:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.0.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=1), ), using_qnn:true] (%10021:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=1)]) -> (%10026:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.0.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=1), ), using_qnn:true] (%10026:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=1)]) -> (%10027:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.0.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=81, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=87, solved=1)), using_qnn:true] (%10023:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=81, solved=1)]) -> (%10028:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.0.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=89, solved=1)), using_qnn:true] (%10025:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=83, solved=1)]) -> (%10029:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.0.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.0.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.0.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1), ), using_qnn:true] (%10028:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1)]) -> (%10030:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.0.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1), ), using_qnn:true] (%10028:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1)]) -> (%10031:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.0.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=90, solved=1), ), using_qnn:true] (%10031:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1)]) -> (%10032:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=90, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.0.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=90, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=90, solved=1), ), using_qnn:true] (%10032:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=90, solved=1)], %10030:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1)]) -> (%10033:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=90, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.0.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=90, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=90, solved=1), ), using_qnn:true] (%10033:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=90, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10034:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=90, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.0.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1), ), using_qnn:true] (%10028:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10035:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.0.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=90, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1), ), using_qnn:true] (%10035:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1)], %10034:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=90, solved=1)]) -> (%10036:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.0.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1), ), using_qnn:true] (%10029:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1)]) -> (%10037:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.0.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1), ), using_qnn:true] (%10029:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1)]) -> (%10038:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.0.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=1), ), using_qnn:true] (%10038:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1)]) -> (%10039:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.0.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=1), ), using_qnn:true] (%10039:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=1)], %10037:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1)]) -> (%10040:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.0.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=1), ), using_qnn:true] (%10040:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10041:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.0.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1), ), using_qnn:true] (%10029:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10042:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.0.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1), ), using_qnn:true] (%10042:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1)], %10041:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=91, solved=1)]) -> (%10043:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.0.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=92, solved=1), ), using_qnn:true] (%10043:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=88, solved=1)]) -> (%10044:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=92, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.0.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=92, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1), ), using_qnn:true] (%10044:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=92, solved=1)]) -> (%10045:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.0.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1), ), using_qnn:true] (%10045:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1)]) -> (%10047:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.0.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=94, solved=1), ), using_qnn:true] (%10027:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=85, solved=1)]) -> (%10048:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=94, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.0.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=94, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=95, solved=1), ), using_qnn:true] (%10048:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=94, solved=1)]) -> (%10049:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=95, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.0.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1), ), using_qnn:true] (%9941:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1)], %10047:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1)]) -> (%10051:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.0.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=95, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1), ), using_qnn:true] (%9942:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1)], %10049:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=95, solved=1)]) -> (%10052:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.0.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1), ), using_qnn:true] (%10051:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1)]) -> (%10053:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.0.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1), ), using_qnn:true] (%10052:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1)]) -> (%10054:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.0.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=96, solved=1), ), using_qnn:true] (%10036:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=86, solved=1)], %10053:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=3, solved=1)]) -> (%10055:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=96, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.0.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=96, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=96, solved=1), ), using_qnn:true] (%10055:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=96, solved=1)], %10056:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=97, solved=1)]) -> (%10057:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=96, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.0.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=96, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98, solved=1), ), using_qnn:true] (%10057:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=96, solved=1)]) -> (%10058:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.0.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=99, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98, solved=1), ), using_qnn:true] (%10058:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98, solved=1)], %10059:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=99, solved=1)]) -> (%10060:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.0.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=100, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=101, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10061:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=100, solved=1), constant:[0]]) -> (%10062:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=101, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.0.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=101, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=96, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=102, solved=1), ), using_qnn:true] (%10062:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=101, solved=1)], %10057:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=96, solved=1)], %10060:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=98, solved=1)]) -> (%10063:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=102, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.0.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=102, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=103, solved=1), ), using_qnn:true] (%10063:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=102, solved=1)]) -> (%10064:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=103, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.0.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=103, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=1), ), using_qnn:true] (%10064:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=103, solved=1)], %10054:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=39, solved=1)]) -> (%10065:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.0.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=1), ), using_qnn:true] (%10065:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=1)]) -> (%10066:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.0.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=1), ), using_qnn:true] (%10066:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=1)]) -> (%10067:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.0.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=105, solved=1)), using_qnn:true] (%10067:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=104, solved=1)]) -> (%10068:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.0.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=1), ), using_qnn:true] (%10068:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=1)]) -> (%10069:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.0.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10014:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10069:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=106, solved=1)]) -> (%10070:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.0.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=107, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=108, solved=1)), using_qnn:true] (%10070:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10071:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=107, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.0.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=107, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=107, solved=1), ), using_qnn:true] (%10071:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=107, solved=1)]) -> (%10072:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=107, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.0.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=107, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=110, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=109, solved=1)), using_qnn:true] (%10072:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=107, solved=1)]) -> (%10073:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=110, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.0.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=110, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=110, solved=1), ), using_qnn:true] (%10073:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=110, solved=1)]) -> (%10074:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=110, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.0.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=107, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=111, solved=1)), using_qnn:true] (%10072:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=107, solved=1)]) -> (%10075:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.0.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1), ), using_qnn:true] (%10075:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1)]) -> (%10076:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.0.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=113, solved=1), ), using_qnn:true] (%10076:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1)]) -> (%10077:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=113, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.0.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=113, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1), ), using_qnn:true] (%10076:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1)], %10077:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=113, solved=1)]) -> (%10078:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.0.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=110, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1), ), using_qnn:true] (%10078:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1)], %10074:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=110, solved=1)]) -> (%10079:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.0.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1), ), using_qnn:true] (%10079:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1)]) -> (%10080:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.0.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=115, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=114, solved=1)), using_qnn:true] (%10080:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=112, solved=1)]) -> (%10081:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=115, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.0.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=115, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=115, solved=1), ), using_qnn:true] (%10081:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=115, solved=1)]) -> (%10082:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=115, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.0.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=115, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10070:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10082:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=115, solved=1)]) -> (%10083:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.1.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=116, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=117, solved=1)), using_qnn:true] (%10083:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10084:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=116, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.1.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=116, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=116, solved=1), ), using_qnn:true] (%10084:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=116, solved=1)]) -> (%10085:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=116, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.1.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=116, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=118, solved=1)), using_qnn:true] (%10085:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=116, solved=1)]) -> (%10086:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.1.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=116, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=121, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=120, solved=1)), using_qnn:true] (%10085:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=116, solved=1)]) -> (%10087:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=121, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.1.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=116, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=122, solved=1)), using_qnn:true] (%10085:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=116, solved=1)]) -> (%10088:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.1.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=1), ), using_qnn:true] (%10086:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=1)]) -> (%10089:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.1.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=1), ), using_qnn:true] (%10089:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=1)]) -> (%10090:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.1.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=121, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=121, solved=1), ), using_qnn:true] (%10087:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=121, solved=1)]) -> (%10091:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=121, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.1.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=121, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=121, solved=1), ), using_qnn:true] (%10091:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=121, solved=1)]) -> (%10092:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=121, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.1.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=1), ), using_qnn:true] (%10088:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=1)]) -> (%10093:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.1.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=1), ), using_qnn:true] (%10093:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=1)]) -> (%10094:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.1.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=125, solved=1)), using_qnn:true] (%10090:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=119, solved=1)]) -> (%10095:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.1.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=121, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=127, solved=1)), using_qnn:true] (%10092:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=121, solved=1)]) -> (%10096:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.1.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.1.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.1.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1), ), using_qnn:true] (%10095:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1)]) -> (%10097:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.1.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1), ), using_qnn:true] (%10095:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1)]) -> (%10098:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.1.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=1), ), using_qnn:true] (%10098:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1)]) -> (%10099:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.1.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=1), ), using_qnn:true] (%10099:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=1)], %10097:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1)]) -> (%10100:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.1.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=1), ), using_qnn:true] (%10100:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10101:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.1.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1), ), using_qnn:true] (%10095:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10102:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.1.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1), ), using_qnn:true] (%10102:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1)], %10101:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=128, solved=1)]) -> (%10103:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.1.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1), ), using_qnn:true] (%10096:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1)]) -> (%10104:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.1.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1), ), using_qnn:true] (%10096:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1)]) -> (%10105:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.1.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=129, solved=1), ), using_qnn:true] (%10105:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1)]) -> (%10106:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=129, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.1.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=129, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=129, solved=1), ), using_qnn:true] (%10106:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=129, solved=1)], %10104:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1)]) -> (%10107:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=129, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.1.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=129, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=129, solved=1), ), using_qnn:true] (%10107:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=129, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10108:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=129, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.1.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1), ), using_qnn:true] (%10096:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.1.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=129, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1), ), using_qnn:true] (%10109:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1)], %10108:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=129, solved=1)]) -> (%10110:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.1.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=130, solved=1), ), using_qnn:true] (%10110:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=126, solved=1)]) -> (%10111:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=130, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.1.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=130, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1), ), using_qnn:true] (%10111:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=130, solved=1)]) -> (%10112:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.1.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1), ), using_qnn:true] (%10112:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1)]) -> (%10114:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.1.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=132, solved=1), ), using_qnn:true] (%10094:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=123, solved=1)]) -> (%10115:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=132, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.1.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=132, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=133, solved=1), ), using_qnn:true] (%10115:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=132, solved=1)]) -> (%10116:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=133, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.1.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1), ), using_qnn:true] (%9943:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1)], %10114:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1)]) -> (%10118:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.1.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=133, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1), ), using_qnn:true] (%9944:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1)], %10116:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=133, solved=1)]) -> (%10119:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.1.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1), ), using_qnn:true] (%10118:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1)]) -> (%10120:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.1.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1), ), using_qnn:true] (%10119:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1)]) -> (%10121:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.1.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=134, solved=1), ), using_qnn:true] (%10103:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=124, solved=1)], %10120:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=4, solved=1)]) -> (%10122:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=134, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.1.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=134, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=135, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=134, solved=1), ), using_qnn:true] (%10122:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=134, solved=1)], %10123:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=135, solved=1)]) -> (%10124:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=134, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.1.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=134, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=1), ), using_qnn:true] (%10124:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=134, solved=1)]) -> (%10125:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.1.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=137, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=1), ), using_qnn:true] (%10125:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=1)], %10126:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=137, solved=1)]) -> (%10127:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.1.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=138, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=139, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10128:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=138, solved=1), constant:[0]]) -> (%10129:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=139, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.1.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=139, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=134, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=140, solved=1), ), using_qnn:true] (%10129:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=139, solved=1)], %10124:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=134, solved=1)], %10127:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=136, solved=1)]) -> (%10130:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=140, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.1.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=140, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=1), ), using_qnn:true] (%10130:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=140, solved=1)]) -> (%10131:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.1.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=142, solved=1), ), using_qnn:true] (%10131:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=141, solved=1)], %10121:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=40, solved=1)]) -> (%10132:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=142, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.1.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=142, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=142, solved=1), ), using_qnn:true] (%10132:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=142, solved=1)]) -> (%10133:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=142, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.1.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=142, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=142, solved=1), ), using_qnn:true] (%10133:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=142, solved=1)]) -> (%10134:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=142, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.1.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=142, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=143, solved=1)), using_qnn:true] (%10134:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=142, solved=1)]) -> (%10135:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.1.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=1), ), using_qnn:true] (%10135:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=1)]) -> (%10136:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.1.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10083:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10136:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=144, solved=1)]) -> (%10137:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.1.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=145, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=146, solved=1)), using_qnn:true] (%10137:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10138:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=145, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.1.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=145, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=145, solved=1), ), using_qnn:true] (%10138:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=145, solved=1)]) -> (%10139:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=145, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.1.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=145, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=148, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=147, solved=1)), using_qnn:true] (%10139:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=145, solved=1)]) -> (%10140:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=148, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.1.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=148, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=148, solved=1), ), using_qnn:true] (%10140:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=148, solved=1)]) -> (%10141:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=148, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.1.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=145, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=149, solved=1)), using_qnn:true] (%10139:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=145, solved=1)]) -> (%10142:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.1.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1), ), using_qnn:true] (%10142:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1)]) -> (%10143:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.1.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=151, solved=1), ), using_qnn:true] (%10143:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1)]) -> (%10144:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=151, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.1.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=151, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1), ), using_qnn:true] (%10143:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1)], %10144:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=151, solved=1)]) -> (%10145:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.1.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=148, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1), ), using_qnn:true] (%10145:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1)], %10141:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=148, solved=1)]) -> (%10146:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.1.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1), ), using_qnn:true] (%10146:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1)]) -> (%10147:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.1.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=153, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=152, solved=1)), using_qnn:true] (%10147:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=150, solved=1)]) -> (%10148:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=153, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.1.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=153, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=153, solved=1), ), using_qnn:true] (%10148:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=153, solved=1)]) -> (%10149:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=153, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.1.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=153, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10137:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10149:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=153, solved=1)]) -> (%10150:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.2.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=155, solved=1)), using_qnn:true] (%10150:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10151:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.2.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=1), ), using_qnn:true] (%10151:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=1)]) -> (%10152:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.2.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=157, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=156, solved=1)), using_qnn:true] (%10152:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=1)]) -> (%10153:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=157, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.2.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=158, solved=1)), using_qnn:true] (%10152:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=1)]) -> (%10154:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.2.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=160, solved=1)), using_qnn:true] (%10152:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=154, solved=1)]) -> (%10155:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.2.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=157, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=157, solved=1), ), using_qnn:true] (%10153:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=157, solved=1)]) -> (%10156:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=157, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.2.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=157, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=157, solved=1), ), using_qnn:true] (%10156:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=157, solved=1)]) -> (%10157:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=157, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.2.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=1), ), using_qnn:true] (%10154:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=1)]) -> (%10158:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.2.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=1), ), using_qnn:true] (%10158:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=1)]) -> (%10159:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.2.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=1), ), using_qnn:true] (%10155:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=1)]) -> (%10160:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.2.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=1), ), using_qnn:true] (%10160:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=1)]) -> (%10161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.2.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=157, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=163, solved=1)), using_qnn:true] (%10157:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=157, solved=1)]) -> (%10162:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.2.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=165, solved=1)), using_qnn:true] (%10159:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=159, solved=1)]) -> (%10163:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.2.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.2.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.2.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1), ), using_qnn:true] (%10162:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1)]) -> (%10164:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.2.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1), ), using_qnn:true] (%10162:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1)]) -> (%10165:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.2.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=166, solved=1), ), using_qnn:true] (%10165:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1)]) -> (%10166:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=166, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.2.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=166, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=166, solved=1), ), using_qnn:true] (%10166:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=166, solved=1)], %10164:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1)]) -> (%10167:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=166, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.2.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=166, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=166, solved=1), ), using_qnn:true] (%10167:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=166, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10168:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=166, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.2.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1), ), using_qnn:true] (%10162:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10169:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.2.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=166, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1), ), using_qnn:true] (%10169:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1)], %10168:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=166, solved=1)]) -> (%10170:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.2.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1), ), using_qnn:true] (%10163:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1)]) -> (%10171:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.2.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1), ), using_qnn:true] (%10163:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1)]) -> (%10172:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.2.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=1), ), using_qnn:true] (%10172:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1)]) -> (%10173:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.2.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=1), ), using_qnn:true] (%10173:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=1)], %10171:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1)]) -> (%10174:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.2.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=1), ), using_qnn:true] (%10174:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10175:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.2.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1), ), using_qnn:true] (%10163:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10176:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.2.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1), ), using_qnn:true] (%10176:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1)], %10175:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=167, solved=1)]) -> (%10177:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.2.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=168, solved=1), ), using_qnn:true] (%10177:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=164, solved=1)]) -> (%10178:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=168, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.2.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=168, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1), ), using_qnn:true] (%10178:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=168, solved=1)]) -> (%10179:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.2.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1), ), using_qnn:true] (%10179:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1)]) -> (%10181:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.2.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=170, solved=1), ), using_qnn:true] (%10161:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=161, solved=1)]) -> (%10182:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=170, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.2.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=170, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=171, solved=1), ), using_qnn:true] (%10182:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=170, solved=1)]) -> (%10183:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=171, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.2.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1), ), using_qnn:true] (%9945:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1)], %10181:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1)]) -> (%10185:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.2.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=171, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1), ), using_qnn:true] (%9946:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1)], %10183:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=171, solved=1)]) -> (%10186:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.2.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1), ), using_qnn:true] (%10185:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1)]) -> (%10187:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.2.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1), ), using_qnn:true] (%10186:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1)]) -> (%10188:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.2.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=172, solved=1), ), using_qnn:true] (%10170:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=162, solved=1)], %10187:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=5, solved=1)]) -> (%10189:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=172, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.2.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=172, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=173, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=172, solved=1), ), using_qnn:true] (%10189:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=172, solved=1)], %10190:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=173, solved=1)]) -> (%10191:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=172, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.2.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=172, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=1), ), using_qnn:true] (%10191:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=172, solved=1)]) -> (%10192:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.2.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=175, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=1), ), using_qnn:true] (%10192:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=1)], %10193:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=175, solved=1)]) -> (%10194:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.2.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=176, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=177, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10195:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=176, solved=1), constant:[0]]) -> (%10196:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=177, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.2.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=177, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=172, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=1), ), using_qnn:true] (%10196:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=177, solved=1)], %10191:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=172, solved=1)], %10194:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=174, solved=1)]) -> (%10197:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.2.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=1), ), using_qnn:true] (%10197:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=178, solved=1)]) -> (%10198:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.2.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=1), ), using_qnn:true] (%10198:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=179, solved=1)], %10188:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=41, solved=1)]) -> (%10199:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.2.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=1), ), using_qnn:true] (%10199:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=1)]) -> (%10200:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.2.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=1), ), using_qnn:true] (%10200:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=1)]) -> (%10201:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.2.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=182, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=181, solved=1)), using_qnn:true] (%10201:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=180, solved=1)]) -> (%10202:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=182, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.2.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=182, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=182, solved=1), ), using_qnn:true] (%10202:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=182, solved=1)]) -> (%10203:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=182, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.2.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=182, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10150:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10203:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=182, solved=1)]) -> (%10204:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.2.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=183, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=184, solved=1)), using_qnn:true] (%10204:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10205:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=183, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.2.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=183, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=183, solved=1), ), using_qnn:true] (%10205:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=183, solved=1)]) -> (%10206:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=183, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.2.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=183, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=186, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=185, solved=1)), using_qnn:true] (%10206:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=183, solved=1)]) -> (%10207:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=186, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.2.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=186, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=186, solved=1), ), using_qnn:true] (%10207:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=186, solved=1)]) -> (%10208:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=186, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.2.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=183, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=187, solved=1)), using_qnn:true] (%10206:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=183, solved=1)]) -> (%10209:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.2.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1), ), using_qnn:true] (%10209:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1)]) -> (%10210:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.2.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=1), ), using_qnn:true] (%10210:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1)]) -> (%10211:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.2.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1), ), using_qnn:true] (%10210:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1)], %10211:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=189, solved=1)]) -> (%10212:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.2.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=186, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1), ), using_qnn:true] (%10212:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1)], %10208:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=186, solved=1)]) -> (%10213:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.2.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1), ), using_qnn:true] (%10213:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1)]) -> (%10214:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.2.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=191, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=190, solved=1)), using_qnn:true] (%10214:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=188, solved=1)]) -> (%10215:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=191, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.2.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=191, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=191, solved=1), ), using_qnn:true] (%10215:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=191, solved=1)]) -> (%10216:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=191, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.2.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=191, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10204:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10216:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=191, solved=1)]) -> (%10217:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.3.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=192, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=193, solved=1)), using_qnn:true] (%10217:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10218:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=192, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.3.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=192, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=192, solved=1), ), using_qnn:true] (%10218:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=192, solved=1)]) -> (%10219:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=192, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.3.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=192, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=195, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=194, solved=1)), using_qnn:true] (%10219:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=192, solved=1)]) -> (%10220:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=195, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.3.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=192, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=196, solved=1)), using_qnn:true] (%10219:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=192, solved=1)]) -> (%10221:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.3.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=192, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=199, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=198, solved=1)), using_qnn:true] (%10219:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=192, solved=1)]) -> (%10222:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=199, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.3.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=195, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=195, solved=1), ), using_qnn:true] (%10220:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=195, solved=1)]) -> (%10223:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=195, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.3.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=195, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=195, solved=1), ), using_qnn:true] (%10223:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=195, solved=1)]) -> (%10224:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=195, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.3.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=1), ), using_qnn:true] (%10221:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=1)]) -> (%10225:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.3.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=1), ), using_qnn:true] (%10225:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=1)]) -> (%10226:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.3.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=199, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=199, solved=1), ), using_qnn:true] (%10222:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=199, solved=1)]) -> (%10227:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=199, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.3.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=199, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=199, solved=1), ), using_qnn:true] (%10227:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=199, solved=1)]) -> (%10228:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=199, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.3.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=195, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=201, solved=1)), using_qnn:true] (%10224:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=195, solved=1)]) -> (%10229:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.3.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=203, solved=1)), using_qnn:true] (%10226:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=197, solved=1)]) -> (%10230:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.3.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.3.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.3.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1), ), using_qnn:true] (%10229:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1)]) -> (%10231:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.3.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1), ), using_qnn:true] (%10229:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1)]) -> (%10232:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.3.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=204, solved=1), ), using_qnn:true] (%10232:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1)]) -> (%10233:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=204, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.3.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=204, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=204, solved=1), ), using_qnn:true] (%10233:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=204, solved=1)], %10231:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1)]) -> (%10234:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=204, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.3.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=204, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=204, solved=1), ), using_qnn:true] (%10234:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=204, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10235:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=204, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.3.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1), ), using_qnn:true] (%10229:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10236:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.3.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=204, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1), ), using_qnn:true] (%10236:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1)], %10235:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=204, solved=1)]) -> (%10237:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.3.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1), ), using_qnn:true] (%10230:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1)]) -> (%10238:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.3.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1), ), using_qnn:true] (%10230:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1)]) -> (%10239:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.3.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=1), ), using_qnn:true] (%10239:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1)]) -> (%10240:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.3.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=1), ), using_qnn:true] (%10240:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=1)], %10238:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1)]) -> (%10241:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.3.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=1), ), using_qnn:true] (%10241:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10242:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.3.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1), ), using_qnn:true] (%10230:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10243:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.3.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1), ), using_qnn:true] (%10243:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1)], %10242:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=205, solved=1)]) -> (%10244:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.3.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=206, solved=1), ), using_qnn:true] (%10244:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=202, solved=1)]) -> (%10245:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=206, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.3.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=206, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1), ), using_qnn:true] (%10245:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=206, solved=1)]) -> (%10246:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.3.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1), ), using_qnn:true] (%10246:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1)]) -> (%10248:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.3.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=199, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=208, solved=1), ), using_qnn:true] (%10228:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=199, solved=1)]) -> (%10249:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=208, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.3.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=208, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=209, solved=1), ), using_qnn:true] (%10249:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=208, solved=1)]) -> (%10250:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=209, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.3.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1), ), using_qnn:true] (%9947:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1)], %10248:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1)]) -> (%10252:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.3.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=209, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1), ), using_qnn:true] (%9948:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1)], %10250:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=209, solved=1)]) -> (%10253:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.3.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1), ), using_qnn:true] (%10252:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1)]) -> (%10254:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.3.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1), ), using_qnn:true] (%10253:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1)]) -> (%10255:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.3.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=210, solved=1), ), using_qnn:true] (%10237:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=200, solved=1)], %10254:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=6, solved=1)]) -> (%10256:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=210, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.3.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=210, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=210, solved=1), ), using_qnn:true] (%10256:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=210, solved=1)], %10257:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=211, solved=1)]) -> (%10258:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=210, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.3.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=210, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=212, solved=1), ), using_qnn:true] (%10258:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=210, solved=1)]) -> (%10259:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=212, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.3.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=212, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=212, solved=1), ), using_qnn:true] (%10259:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=212, solved=1)], %10260:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=213, solved=1)]) -> (%10261:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=212, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.3.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=214, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=215, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10262:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=214, solved=1), constant:[0]]) -> (%10263:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=215, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.3.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=215, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=210, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=212, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=1), ), using_qnn:true] (%10263:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=215, solved=1)], %10258:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=210, solved=1)], %10261:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=212, solved=1)]) -> (%10264:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.3.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=217, solved=1), ), using_qnn:true] (%10264:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=216, solved=1)]) -> (%10265:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=217, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.3.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=217, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=218, solved=1), ), using_qnn:true] (%10265:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=217, solved=1)], %10255:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=42, solved=1)]) -> (%10266:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=218, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.3.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=218, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=218, solved=1), ), using_qnn:true] (%10266:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=218, solved=1)]) -> (%10267:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=218, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.3.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=218, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=218, solved=1), ), using_qnn:true] (%10267:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=218, solved=1)]) -> (%10268:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=218, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.3.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=218, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=220, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=219, solved=1)), using_qnn:true] (%10268:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=218, solved=1)]) -> (%10269:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=220, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.3.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=220, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=220, solved=1), ), using_qnn:true] (%10269:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=220, solved=1)]) -> (%10270:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=220, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.3.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=220, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10217:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10270:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=220, solved=1)]) -> (%10271:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.3.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=221, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=222, solved=1)), using_qnn:true] (%10271:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10272:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=221, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.3.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=221, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=221, solved=1), ), using_qnn:true] (%10272:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=221, solved=1)]) -> (%10273:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=221, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.3.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=221, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=223, solved=1)), using_qnn:true] (%10273:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=221, solved=1)]) -> (%10274:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.3.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=1), ), using_qnn:true] (%10274:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=1)]) -> (%10275:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.3.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=221, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=225, solved=1)), using_qnn:true] (%10273:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=221, solved=1)]) -> (%10276:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.3.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1), ), using_qnn:true] (%10276:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1)]) -> (%10277:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.3.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=227, solved=1), ), using_qnn:true] (%10277:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1)]) -> (%10278:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=227, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.3.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=227, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1), ), using_qnn:true] (%10277:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1)], %10278:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=227, solved=1)]) -> (%10279:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.3.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1), ), using_qnn:true] (%10279:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1)], %10275:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=224, solved=1)]) -> (%10280:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.3.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1), ), using_qnn:true] (%10280:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1)]) -> (%10281:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.3.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=228, solved=1)), using_qnn:true] (%10281:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=226, solved=1)]) -> (%10282:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.3.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=1), ), using_qnn:true] (%10282:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=1)]) -> (%10283:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.3.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10271:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10283:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=229, solved=1)]) -> (%10284:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.4.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=230, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=231, solved=1)), using_qnn:true] (%10284:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10285:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=230, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.4.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=230, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=230, solved=1), ), using_qnn:true] (%10285:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=230, solved=1)]) -> (%10286:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=230, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.4.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=230, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=232, solved=1)), using_qnn:true] (%10286:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=230, solved=1)]) -> (%10287:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.4.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=230, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=234, solved=1)), using_qnn:true] (%10286:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=230, solved=1)]) -> (%10288:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.4.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=230, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=236, solved=1)), using_qnn:true] (%10286:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=230, solved=1)]) -> (%10289:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.4.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=1), ), using_qnn:true] (%10287:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=1)]) -> (%10290:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.4.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=1), ), using_qnn:true] (%10290:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=1)]) -> (%10291:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.4.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=1), ), using_qnn:true] (%10288:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=1)]) -> (%10292:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.4.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=1), ), using_qnn:true] (%10292:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=1)]) -> (%10293:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.4.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=1), ), using_qnn:true] (%10289:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=1)]) -> (%10294:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.4.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=1), ), using_qnn:true] (%10294:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=1)]) -> (%10295:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.4.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=239, solved=1)), using_qnn:true] (%10291:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=233, solved=1)]) -> (%10296:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.4.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=241, solved=1)), using_qnn:true] (%10293:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=235, solved=1)]) -> (%10297:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.4.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.4.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.4.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1), ), using_qnn:true] (%10296:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1)]) -> (%10298:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.4.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1), ), using_qnn:true] (%10296:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1)]) -> (%10299:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.4.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=1), ), using_qnn:true] (%10299:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1)]) -> (%10300:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.4.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=1), ), using_qnn:true] (%10300:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=1)], %10298:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1)]) -> (%10301:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.4.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=1), ), using_qnn:true] (%10301:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10302:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.4.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1), ), using_qnn:true] (%10296:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10303:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.4.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1), ), using_qnn:true] (%10303:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1)], %10302:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=242, solved=1)]) -> (%10304:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.4.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1), ), using_qnn:true] (%10297:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1)]) -> (%10305:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.4.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1), ), using_qnn:true] (%10297:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1)]) -> (%10306:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.4.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=243, solved=1), ), using_qnn:true] (%10306:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1)]) -> (%10307:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=243, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.4.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=243, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=243, solved=1), ), using_qnn:true] (%10307:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=243, solved=1)], %10305:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1)]) -> (%10308:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=243, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.4.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=243, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=243, solved=1), ), using_qnn:true] (%10308:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=243, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10309:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=243, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.4.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1), ), using_qnn:true] (%10297:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10310:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.4.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=243, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1), ), using_qnn:true] (%10310:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1)], %10309:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=243, solved=1)]) -> (%10311:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.4.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=244, solved=1), ), using_qnn:true] (%10311:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=240, solved=1)]) -> (%10312:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=244, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.4.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=244, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1), ), using_qnn:true] (%10312:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=244, solved=1)]) -> (%10313:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.4.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1), ), using_qnn:true] (%10313:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1)]) -> (%10315:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.4.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=246, solved=1), ), using_qnn:true] (%10295:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=237, solved=1)]) -> (%10316:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=246, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.4.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=246, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=247, solved=1), ), using_qnn:true] (%10316:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=246, solved=1)]) -> (%10317:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=247, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.4.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1), ), using_qnn:true] (%9949:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1)], %10315:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1)]) -> (%10319:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.4.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=247, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1), ), using_qnn:true] (%9950:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1)], %10317:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=247, solved=1)]) -> (%10320:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.4.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1), ), using_qnn:true] (%10319:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1)]) -> (%10321:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.4.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1), ), using_qnn:true] (%10320:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1)]) -> (%10322:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.4.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=1), ), using_qnn:true] (%10304:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=238, solved=1)], %10321:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=7, solved=1)]) -> (%10323:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.4.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=1), ), using_qnn:true] (%10323:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=1)], %10324:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=249, solved=1)]) -> (%10325:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.4.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=250, solved=1), ), using_qnn:true] (%10325:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=1)]) -> (%10326:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=250, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.4.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=250, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=250, solved=1), ), using_qnn:true] (%10326:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=250, solved=1)], %10327:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=251, solved=1)]) -> (%10328:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=250, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.4.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=252, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=253, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10329:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=252, solved=1), constant:[0]]) -> (%10330:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=253, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.4.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=253, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=250, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=254, solved=1), ), using_qnn:true] (%10330:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=253, solved=1)], %10325:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=248, solved=1)], %10328:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=250, solved=1)]) -> (%10331:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=254, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.4.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=254, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=255, solved=1), ), using_qnn:true] (%10331:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=254, solved=1)]) -> (%10332:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=255, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.4.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=255, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=256, solved=1), ), using_qnn:true] (%10332:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=255, solved=1)], %10322:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=43, solved=1)]) -> (%10333:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=256, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.4.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=256, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=256, solved=1), ), using_qnn:true] (%10333:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=256, solved=1)]) -> (%10334:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=256, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.4.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=256, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=256, solved=1), ), using_qnn:true] (%10334:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=256, solved=1)]) -> (%10335:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=256, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.4.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=256, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=258, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=257, solved=1)), using_qnn:true] (%10335:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=256, solved=1)]) -> (%10336:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=258, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.4.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=258, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=258, solved=1), ), using_qnn:true] (%10336:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=258, solved=1)]) -> (%10337:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=258, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.4.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=258, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10284:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10337:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=258, solved=1)]) -> (%10338:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.4.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=260, solved=1)), using_qnn:true] (%10338:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10339:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.4.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=1), ), using_qnn:true] (%10339:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=1)]) -> (%10340:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.4.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=262, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=261, solved=1)), using_qnn:true] (%10340:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=1)]) -> (%10341:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=262, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.4.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=262, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=262, solved=1), ), using_qnn:true] (%10341:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=262, solved=1)]) -> (%10342:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=262, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.4.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=263, solved=1)), using_qnn:true] (%10340:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=259, solved=1)]) -> (%10343:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.4.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1), ), using_qnn:true] (%10343:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1)]) -> (%10344:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.4.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=265, solved=1), ), using_qnn:true] (%10344:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1)]) -> (%10345:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=265, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.4.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=265, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1), ), using_qnn:true] (%10344:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1)], %10345:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=265, solved=1)]) -> (%10346:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.4.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=262, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1), ), using_qnn:true] (%10346:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1)], %10342:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=262, solved=1)]) -> (%10347:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.4.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1), ), using_qnn:true] (%10347:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1)]) -> (%10348:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.4.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=266, solved=1)), using_qnn:true] (%10348:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=264, solved=1)]) -> (%10349:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.4.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=1), ), using_qnn:true] (%10349:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=1)]) -> (%10350:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.4.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10338:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10350:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=267, solved=1)]) -> (%10351:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.5.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=269, solved=1)), using_qnn:true] (%10351:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10352:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.5.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=1), ), using_qnn:true] (%10352:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=1)]) -> (%10353:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.5.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=271, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=270, solved=1)), using_qnn:true] (%10353:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=1)]) -> (%10354:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=271, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.5.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=272, solved=1)), using_qnn:true] (%10353:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=1)]) -> (%10355:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.5.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=274, solved=1)), using_qnn:true] (%10353:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=268, solved=1)]) -> (%10356:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.5.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=271, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=271, solved=1), ), using_qnn:true] (%10354:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=271, solved=1)]) -> (%10357:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=271, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.5.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=271, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=271, solved=1), ), using_qnn:true] (%10357:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=271, solved=1)]) -> (%10358:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=271, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.5.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=1), ), using_qnn:true] (%10355:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=1)]) -> (%10359:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.5.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=1), ), using_qnn:true] (%10359:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=1)]) -> (%10360:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.5.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=1), ), using_qnn:true] (%10356:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=1)]) -> (%10361:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.5.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=1), ), using_qnn:true] (%10361:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=1)]) -> (%10362:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.5.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=271, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=277, solved=1)), using_qnn:true] (%10358:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=271, solved=1)]) -> (%10363:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.5.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=279, solved=1)), using_qnn:true] (%10360:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=273, solved=1)]) -> (%10364:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.5.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.5.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.5.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1), ), using_qnn:true] (%10363:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1)]) -> (%10365:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.5.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1), ), using_qnn:true] (%10363:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1)]) -> (%10366:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.5.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=280, solved=1), ), using_qnn:true] (%10366:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1)]) -> (%10367:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=280, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.5.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=280, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=280, solved=1), ), using_qnn:true] (%10367:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=280, solved=1)], %10365:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1)]) -> (%10368:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=280, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.5.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=280, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=280, solved=1), ), using_qnn:true] (%10368:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=280, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10369:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=280, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.5.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1), ), using_qnn:true] (%10363:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10370:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.5.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=280, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1), ), using_qnn:true] (%10370:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1)], %10369:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=280, solved=1)]) -> (%10371:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.5.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1), ), using_qnn:true] (%10364:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1)]) -> (%10372:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.5.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1), ), using_qnn:true] (%10364:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1)]) -> (%10373:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.5.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=1), ), using_qnn:true] (%10373:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1)]) -> (%10374:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.5.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=1), ), using_qnn:true] (%10374:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=1)], %10372:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1)]) -> (%10375:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.5.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=1), ), using_qnn:true] (%10375:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10376:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.5.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1), ), using_qnn:true] (%10364:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10377:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.5.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1), ), using_qnn:true] (%10377:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1)], %10376:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=281, solved=1)]) -> (%10378:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.5.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=282, solved=1), ), using_qnn:true] (%10378:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=278, solved=1)]) -> (%10379:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=282, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.5.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=282, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1), ), using_qnn:true] (%10379:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=282, solved=1)]) -> (%10380:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.5.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1), ), using_qnn:true] (%10380:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1)]) -> (%10382:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.5.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=284, solved=1), ), using_qnn:true] (%10362:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=275, solved=1)]) -> (%10383:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=284, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.5.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=284, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=285, solved=1), ), using_qnn:true] (%10383:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=284, solved=1)]) -> (%10384:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=285, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.5.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1), ), using_qnn:true] (%9951:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1)], %10382:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1)]) -> (%10386:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.5.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=285, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1), ), using_qnn:true] (%9952:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1)], %10384:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=285, solved=1)]) -> (%10387:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.5.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1), ), using_qnn:true] (%10386:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1)]) -> (%10388:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.5.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1), ), using_qnn:true] (%10387:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1)]) -> (%10389:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.5.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=1), ), using_qnn:true] (%10371:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=276, solved=1)], %10388:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=8, solved=1)]) -> (%10390:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.5.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=287, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=1), ), using_qnn:true] (%10390:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=1)], %10391:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=287, solved=1)]) -> (%10392:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.5.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=288, solved=1), ), using_qnn:true] (%10392:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=1)]) -> (%10393:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=288, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.5.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=288, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=289, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=288, solved=1), ), using_qnn:true] (%10393:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=288, solved=1)], %10394:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=289, solved=1)]) -> (%10395:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=288, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.5.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=290, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=291, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10396:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=290, solved=1), constant:[0]]) -> (%10397:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=291, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.5.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=291, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=288, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=1), ), using_qnn:true] (%10397:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=291, solved=1)], %10392:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=286, solved=1)], %10395:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=288, solved=1)]) -> (%10398:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.5.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=293, solved=1), ), using_qnn:true] (%10398:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=292, solved=1)]) -> (%10399:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=293, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.5.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=293, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=1), ), using_qnn:true] (%10399:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=293, solved=1)], %10389:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=44, solved=1)]) -> (%10400:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.5.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=1), ), using_qnn:true] (%10400:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=1)]) -> (%10401:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.5.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=1), ), using_qnn:true] (%10401:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=1)]) -> (%10402:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.5.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=296, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=295, solved=1)), using_qnn:true] (%10402:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=294, solved=1)]) -> (%10403:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=296, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.5.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=296, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=296, solved=1), ), using_qnn:true] (%10403:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=296, solved=1)]) -> (%10404:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=296, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.5.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=296, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10351:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10404:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=296, solved=1)]) -> (%10405:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.5.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=297, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=298, solved=1)), using_qnn:true] (%10405:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10406:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=297, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.5.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=297, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=297, solved=1), ), using_qnn:true] (%10406:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=297, solved=1)]) -> (%10407:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=297, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.5.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=297, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=300, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=299, solved=1)), using_qnn:true] (%10407:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=297, solved=1)]) -> (%10408:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=300, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.5.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=300, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=300, solved=1), ), using_qnn:true] (%10408:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=300, solved=1)]) -> (%10409:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=300, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.5.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=297, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=301, solved=1)), using_qnn:true] (%10407:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=297, solved=1)]) -> (%10410:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.5.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1), ), using_qnn:true] (%10410:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1)]) -> (%10411:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.5.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=303, solved=1), ), using_qnn:true] (%10411:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1)]) -> (%10412:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=303, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.5.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=303, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1), ), using_qnn:true] (%10411:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1)], %10412:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=303, solved=1)]) -> (%10413:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.5.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=300, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1), ), using_qnn:true] (%10413:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1)], %10409:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=300, solved=1)]) -> (%10414:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.5.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1), ), using_qnn:true] (%10414:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1)]) -> (%10415:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.5.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=304, solved=1)), using_qnn:true] (%10415:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=302, solved=1)]) -> (%10416:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.5.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305, solved=1), ), using_qnn:true] (%10416:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305, solved=1)]) -> (%10417:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.5.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10405:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10417:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=305, solved=1)]) -> (%10418:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.6.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=306, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=307, solved=1)), using_qnn:true] (%10418:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10419:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=306, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.6.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=306, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=306, solved=1), ), using_qnn:true] (%10419:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=306, solved=1)]) -> (%10420:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=306, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.6.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=306, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=309, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=308, solved=1)), using_qnn:true] (%10420:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=306, solved=1)]) -> (%10421:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=309, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.6.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=306, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=310, solved=1)), using_qnn:true] (%10420:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=306, solved=1)]) -> (%10422:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.6.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=306, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=313, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=312, solved=1)), using_qnn:true] (%10420:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=306, solved=1)]) -> (%10423:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=313, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.6.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=309, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=309, solved=1), ), using_qnn:true] (%10421:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=309, solved=1)]) -> (%10424:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=309, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.6.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=309, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=309, solved=1), ), using_qnn:true] (%10424:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=309, solved=1)]) -> (%10425:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=309, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.6.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=1), ), using_qnn:true] (%10422:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=1)]) -> (%10426:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.6.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=1), ), using_qnn:true] (%10426:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=1)]) -> (%10427:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.6.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=313, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=313, solved=1), ), using_qnn:true] (%10423:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=313, solved=1)]) -> (%10428:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=313, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.6.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=313, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=313, solved=1), ), using_qnn:true] (%10428:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=313, solved=1)]) -> (%10429:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=313, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.6.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=309, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=315, solved=1)), using_qnn:true] (%10425:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=309, solved=1)]) -> (%10430:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.6.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=317, solved=1)), using_qnn:true] (%10427:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=311, solved=1)]) -> (%10431:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.6.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.6.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.6.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1), ), using_qnn:true] (%10430:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1)]) -> (%10432:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.6.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1), ), using_qnn:true] (%10430:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1)]) -> (%10433:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.6.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=1), ), using_qnn:true] (%10433:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1)]) -> (%10434:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.6.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=1), ), using_qnn:true] (%10434:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=1)], %10432:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1)]) -> (%10435:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.6.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=1), ), using_qnn:true] (%10435:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10436:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.6.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1), ), using_qnn:true] (%10430:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10437:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.6.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1), ), using_qnn:true] (%10437:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1)], %10436:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=318, solved=1)]) -> (%10438:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.6.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1), ), using_qnn:true] (%10431:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1)]) -> (%10439:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.6.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1), ), using_qnn:true] (%10431:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1)]) -> (%10440:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.6.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=1), ), using_qnn:true] (%10440:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1)]) -> (%10441:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.6.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=1), ), using_qnn:true] (%10441:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=1)], %10439:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1)]) -> (%10442:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.6.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=1), ), using_qnn:true] (%10442:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10443:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.6.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1), ), using_qnn:true] (%10431:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10444:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.6.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1), ), using_qnn:true] (%10444:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1)], %10443:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=319, solved=1)]) -> (%10445:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.6.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=320, solved=1), ), using_qnn:true] (%10445:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=316, solved=1)]) -> (%10446:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=320, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.6.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=320, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1), ), using_qnn:true] (%10446:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=320, solved=1)]) -> (%10447:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.6.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1), ), using_qnn:true] (%10447:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1)]) -> (%10449:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.6.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=313, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=322, solved=1), ), using_qnn:true] (%10429:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=313, solved=1)]) -> (%10450:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=322, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.6.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=322, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=323, solved=1), ), using_qnn:true] (%10450:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=322, solved=1)]) -> (%10451:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=323, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.6.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1), ), using_qnn:true] (%9953:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1)], %10449:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1)]) -> (%10453:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.6.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=323, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1), ), using_qnn:true] (%9954:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1)], %10451:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=323, solved=1)]) -> (%10454:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.6.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1), ), using_qnn:true] (%10453:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1)]) -> (%10455:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.6.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1), ), using_qnn:true] (%10454:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1)]) -> (%10456:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.6.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=324, solved=1), ), using_qnn:true] (%10438:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=314, solved=1)], %10455:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=9, solved=1)]) -> (%10457:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=324, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.6.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=324, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=325, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=324, solved=1), ), using_qnn:true] (%10457:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=324, solved=1)], %10458:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=325, solved=1)]) -> (%10459:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=324, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.6.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=324, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=326, solved=1), ), using_qnn:true] (%10459:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=324, solved=1)]) -> (%10460:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=326, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.6.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=326, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=326, solved=1), ), using_qnn:true] (%10460:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=326, solved=1)], %10461:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=327, solved=1)]) -> (%10462:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=326, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.6.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=328, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=329, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10463:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=328, solved=1), constant:[0]]) -> (%10464:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=329, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.6.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=329, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=324, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=326, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=330, solved=1), ), using_qnn:true] (%10464:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=329, solved=1)], %10459:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=324, solved=1)], %10462:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=326, solved=1)]) -> (%10465:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=330, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.6.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=330, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=331, solved=1), ), using_qnn:true] (%10465:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=330, solved=1)]) -> (%10466:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=331, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.6.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=331, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=332, solved=1), ), using_qnn:true] (%10466:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=331, solved=1)], %10456:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=45, solved=1)]) -> (%10467:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=332, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.6.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=332, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=332, solved=1), ), using_qnn:true] (%10467:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=332, solved=1)]) -> (%10468:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=332, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.6.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=332, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=332, solved=1), ), using_qnn:true] (%10468:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=332, solved=1)]) -> (%10469:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=332, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.6.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=332, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=333, solved=1)), using_qnn:true] (%10469:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=332, solved=1)]) -> (%10470:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.6.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=1), ), using_qnn:true] (%10470:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=1)]) -> (%10471:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.6.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10418:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10471:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=334, solved=1)]) -> (%10472:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.6.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=335, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=336, solved=1)), using_qnn:true] (%10472:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10473:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=335, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.6.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=335, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=335, solved=1), ), using_qnn:true] (%10473:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=335, solved=1)]) -> (%10474:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=335, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.6.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=335, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=338, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=337, solved=1)), using_qnn:true] (%10474:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=335, solved=1)]) -> (%10475:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=338, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.6.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=338, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=338, solved=1), ), using_qnn:true] (%10475:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=338, solved=1)]) -> (%10476:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=338, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.6.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=335, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=339, solved=1)), using_qnn:true] (%10474:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=335, solved=1)]) -> (%10477:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.6.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1), ), using_qnn:true] (%10477:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1)]) -> (%10478:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.6.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=341, solved=1), ), using_qnn:true] (%10478:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1)]) -> (%10479:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=341, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.6.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=341, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1), ), using_qnn:true] (%10478:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1)], %10479:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=341, solved=1)]) -> (%10480:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.6.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=338, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1), ), using_qnn:true] (%10480:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1)], %10476:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=338, solved=1)]) -> (%10481:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.6.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1), ), using_qnn:true] (%10481:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1)]) -> (%10482:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.6.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=342, solved=1)), using_qnn:true] (%10482:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=340, solved=1)]) -> (%10483:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.6.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343, solved=1), ), using_qnn:true] (%10483:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343, solved=1)]) -> (%10484:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.6.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10472:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10484:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=343, solved=1)]) -> (%10485:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.7.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=344, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=345, solved=1)), using_qnn:true] (%10485:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10486:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=344, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.7.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=344, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=344, solved=1), ), using_qnn:true] (%10486:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=344, solved=1)]) -> (%10487:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=344, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.7.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=344, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=346, solved=1)), using_qnn:true] (%10487:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=344, solved=1)]) -> (%10488:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.7.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=344, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=348, solved=1)), using_qnn:true] (%10487:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=344, solved=1)]) -> (%10489:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.7.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=344, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=350, solved=1)), using_qnn:true] (%10487:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=344, solved=1)]) -> (%10490:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.7.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=1), ), using_qnn:true] (%10488:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=1)]) -> (%10491:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.7.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=1), ), using_qnn:true] (%10491:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=1)]) -> (%10492:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.7.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=1), ), using_qnn:true] (%10489:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=1)]) -> (%10493:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.7.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=1), ), using_qnn:true] (%10493:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=1)]) -> (%10494:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.7.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=1), ), using_qnn:true] (%10490:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=1)]) -> (%10495:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.7.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=1), ), using_qnn:true] (%10495:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=1)]) -> (%10496:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.7.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=353, solved=1)), using_qnn:true] (%10492:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=347, solved=1)]) -> (%10497:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.7.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=355, solved=1)), using_qnn:true] (%10494:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=349, solved=1)]) -> (%10498:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.7.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.7.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.7.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1), ), using_qnn:true] (%10497:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1)]) -> (%10499:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.7.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1), ), using_qnn:true] (%10497:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1)]) -> (%10500:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.7.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=1), ), using_qnn:true] (%10500:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1)]) -> (%10501:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.7.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=1), ), using_qnn:true] (%10501:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=1)], %10499:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1)]) -> (%10502:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.7.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=1), ), using_qnn:true] (%10502:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10503:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.7.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1), ), using_qnn:true] (%10497:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10504:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.7.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1), ), using_qnn:true] (%10504:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1)], %10503:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=356, solved=1)]) -> (%10505:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.7.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1), ), using_qnn:true] (%10498:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1)]) -> (%10506:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.7.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1), ), using_qnn:true] (%10498:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1)]) -> (%10507:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.7.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=1), ), using_qnn:true] (%10507:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1)]) -> (%10508:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.7.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=1), ), using_qnn:true] (%10508:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=1)], %10506:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1)]) -> (%10509:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.7.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=1), ), using_qnn:true] (%10509:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10510:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.7.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1), ), using_qnn:true] (%10498:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10511:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.7.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1), ), using_qnn:true] (%10511:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1)], %10510:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=357, solved=1)]) -> (%10512:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.7.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=358, solved=1), ), using_qnn:true] (%10512:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=354, solved=1)]) -> (%10513:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=358, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.7.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=358, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1), ), using_qnn:true] (%10513:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=358, solved=1)]) -> (%10514:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.7.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1), ), using_qnn:true] (%10514:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1)]) -> (%10516:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.7.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=360, solved=1), ), using_qnn:true] (%10496:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=351, solved=1)]) -> (%10517:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=360, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.7.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=360, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=1), ), using_qnn:true] (%10517:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=360, solved=1)]) -> (%10518:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.7.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1), ), using_qnn:true] (%9955:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1)], %10516:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1)]) -> (%10520:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.7.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1), ), using_qnn:true] (%9956:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1)], %10518:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=1)]) -> (%10521:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.7.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1), ), using_qnn:true] (%10520:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1)]) -> (%10522:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.7.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1), ), using_qnn:true] (%10521:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1)]) -> (%10523:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.7.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=1), ), using_qnn:true] (%10505:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=352, solved=1)], %10522:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=10, solved=1)]) -> (%10524:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.7.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=363, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=1), ), using_qnn:true] (%10524:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=1)], %10525:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=363, solved=1)]) -> (%10526:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.7.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=1), ), using_qnn:true] (%10526:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=1)]) -> (%10527:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.7.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=365, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=1), ), using_qnn:true] (%10527:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=1)], %10528:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=365, solved=1)]) -> (%10529:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.7.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=366, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=367, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10530:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=366, solved=1), constant:[0]]) -> (%10531:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=367, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.7.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=367, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=368, solved=1), ), using_qnn:true] (%10531:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=367, solved=1)], %10526:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=362, solved=1)], %10529:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=364, solved=1)]) -> (%10532:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=368, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.7.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=368, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=1), ), using_qnn:true] (%10532:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=368, solved=1)]) -> (%10533:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.7.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=370, solved=1), ), using_qnn:true] (%10533:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=369, solved=1)], %10523:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=46, solved=1)]) -> (%10534:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=370, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.7.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=370, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=370, solved=1), ), using_qnn:true] (%10534:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=370, solved=1)]) -> (%10535:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=370, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.7.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=370, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=370, solved=1), ), using_qnn:true] (%10535:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=370, solved=1)]) -> (%10536:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=370, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.7.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=370, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=371, solved=1)), using_qnn:true] (%10536:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=370, solved=1)]) -> (%10537:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.7.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=1), ), using_qnn:true] (%10537:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=1)]) -> (%10538:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.7.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10485:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10538:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=372, solved=1)]) -> (%10539:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.7.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=373, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=374, solved=1)), using_qnn:true] (%10539:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10540:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=373, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.7.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=373, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=373, solved=1), ), using_qnn:true] (%10540:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=373, solved=1)]) -> (%10541:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=373, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.7.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=373, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=376, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=375, solved=1)), using_qnn:true] (%10541:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=373, solved=1)]) -> (%10542:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=376, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.7.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=376, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=376, solved=1), ), using_qnn:true] (%10542:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=376, solved=1)]) -> (%10543:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=376, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.7.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=373, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=377, solved=1)), using_qnn:true] (%10541:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=373, solved=1)]) -> (%10544:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.7.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1), ), using_qnn:true] (%10544:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1)]) -> (%10545:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.7.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=379, solved=1), ), using_qnn:true] (%10545:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1)]) -> (%10546:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=379, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.7.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=379, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1), ), using_qnn:true] (%10545:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1)], %10546:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=379, solved=1)]) -> (%10547:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.7.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=376, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1), ), using_qnn:true] (%10547:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1)], %10543:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=376, solved=1)]) -> (%10548:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.7.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1), ), using_qnn:true] (%10548:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1)]) -> (%10549:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.7.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=380, solved=1)), using_qnn:true] (%10549:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=378, solved=1)]) -> (%10550:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.7.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=1), ), using_qnn:true] (%10550:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=1)]) -> (%10551:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.7.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10539:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10551:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=381, solved=1)]) -> (%10552:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.8.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=383, solved=1)), using_qnn:true] (%10552:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10553:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.8.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=1), ), using_qnn:true] (%10553:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=1)]) -> (%10554:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.8.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=385, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=384, solved=1)), using_qnn:true] (%10554:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=1)]) -> (%10555:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=385, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.8.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=387, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=386, solved=1)), using_qnn:true] (%10554:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=1)]) -> (%10556:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=387, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.8.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=388, solved=1)), using_qnn:true] (%10554:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=382, solved=1)]) -> (%10557:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.8.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=385, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=385, solved=1), ), using_qnn:true] (%10555:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=385, solved=1)]) -> (%10558:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=385, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.8.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=385, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=385, solved=1), ), using_qnn:true] (%10558:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=385, solved=1)]) -> (%10559:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=385, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.8.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=387, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=387, solved=1), ), using_qnn:true] (%10556:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=387, solved=1)]) -> (%10560:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=387, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.8.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=387, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=387, solved=1), ), using_qnn:true] (%10560:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=387, solved=1)]) -> (%10561:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=387, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.8.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=1), ), using_qnn:true] (%10557:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=1)]) -> (%10562:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.8.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=1), ), using_qnn:true] (%10562:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=1)]) -> (%10563:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.8.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=385, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=391, solved=1)), using_qnn:true] (%10559:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=385, solved=1)]) -> (%10564:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.8.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=387, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=393, solved=1)), using_qnn:true] (%10561:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=387, solved=1)]) -> (%10565:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.8.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.8.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.8.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1), ), using_qnn:true] (%10564:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1)]) -> (%10566:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.8.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1), ), using_qnn:true] (%10564:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1)]) -> (%10567:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.8.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=394, solved=1), ), using_qnn:true] (%10567:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1)]) -> (%10568:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=394, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.8.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=394, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=394, solved=1), ), using_qnn:true] (%10568:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=394, solved=1)], %10566:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1)]) -> (%10569:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=394, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.8.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=394, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=394, solved=1), ), using_qnn:true] (%10569:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=394, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10570:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=394, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.8.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1), ), using_qnn:true] (%10564:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10571:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.8.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=394, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1), ), using_qnn:true] (%10571:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1)], %10570:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=394, solved=1)]) -> (%10572:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.8.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1), ), using_qnn:true] (%10565:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1)]) -> (%10573:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.8.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1), ), using_qnn:true] (%10565:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1)]) -> (%10574:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.8.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=395, solved=1), ), using_qnn:true] (%10574:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1)]) -> (%10575:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=395, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.8.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=395, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=395, solved=1), ), using_qnn:true] (%10575:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=395, solved=1)], %10573:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1)]) -> (%10576:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=395, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.8.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=395, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=395, solved=1), ), using_qnn:true] (%10576:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=395, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=395, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.8.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1), ), using_qnn:true] (%10565:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10578:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.8.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=395, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1), ), using_qnn:true] (%10578:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1)], %10577:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=395, solved=1)]) -> (%10579:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.8.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=396, solved=1), ), using_qnn:true] (%10579:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=392, solved=1)]) -> (%10580:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=396, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.8.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=396, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1), ), using_qnn:true] (%10580:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=396, solved=1)]) -> (%10581:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.8.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1), ), using_qnn:true] (%10581:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1)]) -> (%10583:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.8.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=398, solved=1), ), using_qnn:true] (%10563:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=389, solved=1)]) -> (%10584:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=398, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.8.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=398, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=399, solved=1), ), using_qnn:true] (%10584:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=398, solved=1)]) -> (%10585:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=399, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.8.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1), ), using_qnn:true] (%9957:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1)], %10583:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1)]) -> (%10587:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.8.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=399, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1), ), using_qnn:true] (%9958:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1)], %10585:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=399, solved=1)]) -> (%10588:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.8.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1), ), using_qnn:true] (%10587:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1)]) -> (%10589:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.8.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1), ), using_qnn:true] (%10588:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1)]) -> (%10590:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.8.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400, solved=1), ), using_qnn:true] (%10572:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=390, solved=1)], %10589:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=11, solved=1)]) -> (%10591:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.8.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=401, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400, solved=1), ), using_qnn:true] (%10591:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400, solved=1)], %10592:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=401, solved=1)]) -> (%10593:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.8.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=402, solved=1), ), using_qnn:true] (%10593:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400, solved=1)]) -> (%10594:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=402, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.8.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=402, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=403, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=402, solved=1), ), using_qnn:true] (%10594:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=402, solved=1)], %10595:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=403, solved=1)]) -> (%10596:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=402, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.8.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=404, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=405, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10597:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=404, solved=1), constant:[0]]) -> (%10598:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=405, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.8.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=405, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=402, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406, solved=1), ), using_qnn:true] (%10598:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=405, solved=1)], %10593:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=400, solved=1)], %10596:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=402, solved=1)]) -> (%10599:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.8.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407, solved=1), ), using_qnn:true] (%10599:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=406, solved=1)]) -> (%10600:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.8.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=1), ), using_qnn:true] (%10600:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=407, solved=1)], %10590:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=47, solved=1)]) -> (%10601:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.8.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=1), ), using_qnn:true] (%10601:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=1)]) -> (%10602:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.8.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=1), ), using_qnn:true] (%10602:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=1)]) -> (%10603:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.8.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=409, solved=1)), using_qnn:true] (%10603:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=408, solved=1)]) -> (%10604:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.8.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410, solved=1), ), using_qnn:true] (%10604:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410, solved=1)]) -> (%10605:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.8.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10552:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10605:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=410, solved=1)]) -> (%10606:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.8.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=411, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=412, solved=1)), using_qnn:true] (%10606:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10607:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=411, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.8.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=411, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=411, solved=1), ), using_qnn:true] (%10607:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=411, solved=1)]) -> (%10608:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=411, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.8.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=411, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=414, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=413, solved=1)), using_qnn:true] (%10608:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=411, solved=1)]) -> (%10609:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=414, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.8.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=414, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=414, solved=1), ), using_qnn:true] (%10609:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=414, solved=1)]) -> (%10610:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=414, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.8.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=411, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=415, solved=1)), using_qnn:true] (%10608:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=411, solved=1)]) -> (%10611:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.8.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1), ), using_qnn:true] (%10611:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1)]) -> (%10612:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.8.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=417, solved=1), ), using_qnn:true] (%10612:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1)]) -> (%10613:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=417, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.8.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=417, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1), ), using_qnn:true] (%10612:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1)], %10613:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=417, solved=1)]) -> (%10614:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.8.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=414, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1), ), using_qnn:true] (%10614:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1)], %10610:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=414, solved=1)]) -> (%10615:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.8.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1), ), using_qnn:true] (%10615:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1)]) -> (%10616:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.8.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=418, solved=1)), using_qnn:true] (%10616:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=416, solved=1)]) -> (%10617:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.8.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=1), ), using_qnn:true] (%10617:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=1)]) -> (%10618:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.8.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10606:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10618:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=419, solved=1)]) -> (%10619:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.9.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=420, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=421, solved=1)), using_qnn:true] (%10619:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10620:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=420, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.9.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=420, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=420, solved=1), ), using_qnn:true] (%10620:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=420, solved=1)]) -> (%10621:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=420, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.9.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=420, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=422, solved=1)), using_qnn:true] (%10621:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=420, solved=1)]) -> (%10622:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.9.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=420, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=424, solved=1)), using_qnn:true] (%10621:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=420, solved=1)]) -> (%10623:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.9.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=420, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=426, solved=1)), using_qnn:true] (%10621:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=420, solved=1)]) -> (%10624:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.9.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=1), ), using_qnn:true] (%10622:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=1)]) -> (%10625:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.9.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=1), ), using_qnn:true] (%10625:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=1)]) -> (%10626:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.9.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=1), ), using_qnn:true] (%10623:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=1)]) -> (%10627:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.9.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=1), ), using_qnn:true] (%10627:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=1)]) -> (%10628:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.9.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=1), ), using_qnn:true] (%10624:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=1)]) -> (%10629:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.9.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=1), ), using_qnn:true] (%10629:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=1)]) -> (%10630:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.9.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=429, solved=1)), using_qnn:true] (%10626:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=423, solved=1)]) -> (%10631:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.9.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=431, solved=1)), using_qnn:true] (%10628:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=425, solved=1)]) -> (%10632:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.9.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.9.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.9.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1), ), using_qnn:true] (%10631:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1)]) -> (%10633:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.9.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1), ), using_qnn:true] (%10631:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1)]) -> (%10634:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.9.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=1), ), using_qnn:true] (%10634:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1)]) -> (%10635:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.9.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=1), ), using_qnn:true] (%10635:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=1)], %10633:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1)]) -> (%10636:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.9.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=1), ), using_qnn:true] (%10636:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10637:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.9.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1), ), using_qnn:true] (%10631:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10638:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.9.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1), ), using_qnn:true] (%10638:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1)], %10637:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=432, solved=1)]) -> (%10639:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.9.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1), ), using_qnn:true] (%10632:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1)]) -> (%10640:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.9.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1), ), using_qnn:true] (%10632:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1)]) -> (%10641:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.9.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=1), ), using_qnn:true] (%10641:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1)]) -> (%10642:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.9.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=1), ), using_qnn:true] (%10642:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=1)], %10640:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1)]) -> (%10643:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.9.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=1), ), using_qnn:true] (%10643:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10644:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.9.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1), ), using_qnn:true] (%10632:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10645:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.9.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1), ), using_qnn:true] (%10645:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1)], %10644:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=433, solved=1)]) -> (%10646:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.9.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=434, solved=1), ), using_qnn:true] (%10646:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=430, solved=1)]) -> (%10647:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=434, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.9.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=434, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1), ), using_qnn:true] (%10647:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=434, solved=1)]) -> (%10648:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.9.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1), ), using_qnn:true] (%10648:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1)]) -> (%10650:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.9.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=436, solved=1), ), using_qnn:true] (%10630:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=427, solved=1)]) -> (%10651:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=436, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.9.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=436, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=437, solved=1), ), using_qnn:true] (%10651:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=436, solved=1)]) -> (%10652:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=437, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.9.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1), ), using_qnn:true] (%9959:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1)], %10650:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1)]) -> (%10654:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.9.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=437, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1), ), using_qnn:true] (%9960:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1)], %10652:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=437, solved=1)]) -> (%10655:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.9.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1), ), using_qnn:true] (%10654:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1)]) -> (%10656:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.9.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1), ), using_qnn:true] (%10655:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1)]) -> (%10657:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.9.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=1), ), using_qnn:true] (%10639:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=428, solved=1)], %10656:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=12, solved=1)]) -> (%10658:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.9.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=1), ), using_qnn:true] (%10658:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=1)], %10659:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=439, solved=1)]) -> (%10660:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.9.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=440, solved=1), ), using_qnn:true] (%10660:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=1)]) -> (%10661:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=440, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.9.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=440, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=441, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=440, solved=1), ), using_qnn:true] (%10661:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=440, solved=1)], %10662:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=441, solved=1)]) -> (%10663:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=440, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.9.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=442, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=443, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10664:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=442, solved=1), constant:[0]]) -> (%10665:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=443, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.9.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=443, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=440, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=444, solved=1), ), using_qnn:true] (%10665:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=443, solved=1)], %10660:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=438, solved=1)], %10663:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=440, solved=1)]) -> (%10666:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=444, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.9.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=444, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=445, solved=1), ), using_qnn:true] (%10666:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=444, solved=1)]) -> (%10667:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=445, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.9.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=445, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=446, solved=1), ), using_qnn:true] (%10667:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=445, solved=1)], %10657:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=48, solved=1)]) -> (%10668:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=446, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.9.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=446, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=446, solved=1), ), using_qnn:true] (%10668:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=446, solved=1)]) -> (%10669:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=446, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.9.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=446, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=446, solved=1), ), using_qnn:true] (%10669:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=446, solved=1)]) -> (%10670:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=446, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.9.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=446, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=447, solved=1)), using_qnn:true] (%10670:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=446, solved=1)]) -> (%10671:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.9.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448, solved=1), ), using_qnn:true] (%10671:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448, solved=1)]) -> (%10672:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.9.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10619:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10672:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=448, solved=1)]) -> (%10673:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.9.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=449, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=450, solved=1)), using_qnn:true] (%10673:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10674:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=449, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.9.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=449, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=449, solved=1), ), using_qnn:true] (%10674:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=449, solved=1)]) -> (%10675:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=449, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.9.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=449, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=452, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=451, solved=1)), using_qnn:true] (%10675:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=449, solved=1)]) -> (%10676:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=452, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.9.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=452, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=452, solved=1), ), using_qnn:true] (%10676:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=452, solved=1)]) -> (%10677:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=452, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.9.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=449, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=453, solved=1)), using_qnn:true] (%10675:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=449, solved=1)]) -> (%10678:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.9.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1), ), using_qnn:true] (%10678:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1)]) -> (%10679:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.9.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=455, solved=1), ), using_qnn:true] (%10679:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1)]) -> (%10680:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=455, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.9.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=455, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1), ), using_qnn:true] (%10679:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1)], %10680:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=455, solved=1)]) -> (%10681:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.9.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=452, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1), ), using_qnn:true] (%10681:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1)], %10677:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=452, solved=1)]) -> (%10682:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.9.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1), ), using_qnn:true] (%10682:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1)]) -> (%10683:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.9.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=457, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=456, solved=1)), using_qnn:true] (%10683:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=454, solved=1)]) -> (%10684:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=457, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.9.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=457, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=457, solved=1), ), using_qnn:true] (%10684:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=457, solved=1)]) -> (%10685:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=457, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.9.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=457, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10673:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10685:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=457, solved=1)]) -> (%10686:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.10.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=459, solved=1)), using_qnn:true] (%10686:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10687:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.10.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=1), ), using_qnn:true] (%10687:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=1)]) -> (%10688:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.10.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=460, solved=1)), using_qnn:true] (%10688:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=1)]) -> (%10689:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.10.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=463, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=462, solved=1)), using_qnn:true] (%10688:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=1)]) -> (%10690:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=463, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.10.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=465, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=464, solved=1)), using_qnn:true] (%10688:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=458, solved=1)]) -> (%10691:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=465, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.10.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=1), ), using_qnn:true] (%10689:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=1)]) -> (%10692:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.10.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=1), ), using_qnn:true] (%10692:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=1)]) -> (%10693:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.10.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=463, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=463, solved=1), ), using_qnn:true] (%10690:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=463, solved=1)]) -> (%10694:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=463, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.10.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=463, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=463, solved=1), ), using_qnn:true] (%10694:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=463, solved=1)]) -> (%10695:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=463, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.10.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=465, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=465, solved=1), ), using_qnn:true] (%10691:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=465, solved=1)]) -> (%10696:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=465, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.10.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=465, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=465, solved=1), ), using_qnn:true] (%10696:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=465, solved=1)]) -> (%10697:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=465, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.10.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=467, solved=1)), using_qnn:true] (%10693:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=461, solved=1)]) -> (%10698:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.10.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=463, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=469, solved=1)), using_qnn:true] (%10695:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=463, solved=1)]) -> (%10699:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.10.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.10.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.10.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1), ), using_qnn:true] (%10698:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1)]) -> (%10700:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.10.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1), ), using_qnn:true] (%10698:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1)]) -> (%10701:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.10.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=1), ), using_qnn:true] (%10701:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1)]) -> (%10702:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.10.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=1), ), using_qnn:true] (%10702:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=1)], %10700:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1)]) -> (%10703:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.10.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=1), ), using_qnn:true] (%10703:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10704:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.10.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1), ), using_qnn:true] (%10698:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10705:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.10.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1), ), using_qnn:true] (%10705:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1)], %10704:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=470, solved=1)]) -> (%10706:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.10.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1), ), using_qnn:true] (%10699:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1)]) -> (%10707:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.10.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1), ), using_qnn:true] (%10699:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1)]) -> (%10708:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.10.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=471, solved=1), ), using_qnn:true] (%10708:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1)]) -> (%10709:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=471, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.10.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=471, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=471, solved=1), ), using_qnn:true] (%10709:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=471, solved=1)], %10707:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1)]) -> (%10710:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=471, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.10.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=471, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=471, solved=1), ), using_qnn:true] (%10710:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=471, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10711:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=471, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.10.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1), ), using_qnn:true] (%10699:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10712:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.10.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=471, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1), ), using_qnn:true] (%10712:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1)], %10711:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=471, solved=1)]) -> (%10713:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.10.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=472, solved=1), ), using_qnn:true] (%10713:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=468, solved=1)]) -> (%10714:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=472, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.10.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=472, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1), ), using_qnn:true] (%10714:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=472, solved=1)]) -> (%10715:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.10.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1), ), using_qnn:true] (%10715:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1)]) -> (%10717:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.10.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=465, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=474, solved=1), ), using_qnn:true] (%10697:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=465, solved=1)]) -> (%10718:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=474, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.10.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=474, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=475, solved=1), ), using_qnn:true] (%10718:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=474, solved=1)]) -> (%10719:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=475, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.10.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1), ), using_qnn:true] (%9961:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1)], %10717:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1)]) -> (%10721:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.10.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=475, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1), ), using_qnn:true] (%9962:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1)], %10719:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=475, solved=1)]) -> (%10722:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.10.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1), ), using_qnn:true] (%10721:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1)]) -> (%10723:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.10.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1), ), using_qnn:true] (%10722:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1)]) -> (%10724:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.10.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=1), ), using_qnn:true] (%10706:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=466, solved=1)], %10723:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=13, solved=1)]) -> (%10725:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.10.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=1), ), using_qnn:true] (%10725:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=1)], %10726:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=477, solved=1)]) -> (%10727:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.10.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=478, solved=1), ), using_qnn:true] (%10727:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=1)]) -> (%10728:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=478, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.10.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=478, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=479, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=478, solved=1), ), using_qnn:true] (%10728:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=478, solved=1)], %10729:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=479, solved=1)]) -> (%10730:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=478, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.10.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=480, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=481, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10731:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=480, solved=1), constant:[0]]) -> (%10732:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=481, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.10.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=481, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=478, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=1), ), using_qnn:true] (%10732:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=481, solved=1)], %10727:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=476, solved=1)], %10730:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=478, solved=1)]) -> (%10733:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.10.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=483, solved=1), ), using_qnn:true] (%10733:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=482, solved=1)]) -> (%10734:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=483, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.10.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=483, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=484, solved=1), ), using_qnn:true] (%10734:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=483, solved=1)], %10724:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=49, solved=1)]) -> (%10735:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=484, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.10.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=484, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=484, solved=1), ), using_qnn:true] (%10735:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=484, solved=1)]) -> (%10736:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=484, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.10.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=484, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=484, solved=1), ), using_qnn:true] (%10736:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=484, solved=1)]) -> (%10737:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=484, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.10.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=484, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=485, solved=1)), using_qnn:true] (%10737:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=484, solved=1)]) -> (%10738:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.10.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=1), ), using_qnn:true] (%10738:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=1)]) -> (%10739:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.10.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10686:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10739:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=486, solved=1)]) -> (%10740:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.10.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=487, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=488, solved=1)), using_qnn:true] (%10740:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10741:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=487, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.10.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=487, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=487, solved=1), ), using_qnn:true] (%10741:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=487, solved=1)]) -> (%10742:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=487, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.10.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=487, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=490, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=489, solved=1)), using_qnn:true] (%10742:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=487, solved=1)]) -> (%10743:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=490, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.10.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=490, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=490, solved=1), ), using_qnn:true] (%10743:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=490, solved=1)]) -> (%10744:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=490, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.10.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=487, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=491, solved=1)), using_qnn:true] (%10742:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=487, solved=1)]) -> (%10745:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.10.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1), ), using_qnn:true] (%10745:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1)]) -> (%10746:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.10.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=1), ), using_qnn:true] (%10746:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1)]) -> (%10747:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.10.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1), ), using_qnn:true] (%10746:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1)], %10747:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=493, solved=1)]) -> (%10748:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.10.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=490, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1), ), using_qnn:true] (%10748:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1)], %10744:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=490, solved=1)]) -> (%10749:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.10.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1), ), using_qnn:true] (%10749:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1)]) -> (%10750:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.10.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=495, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=494, solved=1)), using_qnn:true] (%10750:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=492, solved=1)]) -> (%10751:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=495, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.10.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=495, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=495, solved=1), ), using_qnn:true] (%10751:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=495, solved=1)]) -> (%10752:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=495, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.10.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=495, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10740:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10752:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=495, solved=1)]) -> (%10753:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.11.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=497, solved=1)), using_qnn:true] (%10753:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10754:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.11.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=1), ), using_qnn:true] (%10754:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=1)]) -> (%10755:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.11.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=499, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=498, solved=1)), using_qnn:true] (%10755:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=1)]) -> (%10756:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=499, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.11.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=501, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=500, solved=1)), using_qnn:true] (%10755:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=1)]) -> (%10757:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=501, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.11.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=502, solved=1)), using_qnn:true] (%10755:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=496, solved=1)]) -> (%10758:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.11.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=499, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=499, solved=1), ), using_qnn:true] (%10756:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=499, solved=1)]) -> (%10759:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=499, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.11.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=499, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=499, solved=1), ), using_qnn:true] (%10759:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=499, solved=1)]) -> (%10760:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=499, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.11.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=501, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=501, solved=1), ), using_qnn:true] (%10757:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=501, solved=1)]) -> (%10761:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=501, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.11.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=501, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=501, solved=1), ), using_qnn:true] (%10761:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=501, solved=1)]) -> (%10762:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=501, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.11.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=1), ), using_qnn:true] (%10758:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=1)]) -> (%10763:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.11.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=1), ), using_qnn:true] (%10763:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=1)]) -> (%10764:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.11.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=499, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=505, solved=1)), using_qnn:true] (%10760:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=499, solved=1)]) -> (%10765:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.11.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=501, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=507, solved=1)), using_qnn:true] (%10762:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=501, solved=1)]) -> (%10766:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.11.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.11.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.11.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1), ), using_qnn:true] (%10765:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1)]) -> (%10767:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.11.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1), ), using_qnn:true] (%10765:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1)]) -> (%10768:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.11.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=1), ), using_qnn:true] (%10768:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1)]) -> (%10769:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.11.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=1), ), using_qnn:true] (%10769:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=1)], %10767:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1)]) -> (%10770:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.11.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=1), ), using_qnn:true] (%10770:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10771:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.11.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1), ), using_qnn:true] (%10765:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10772:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.11.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1), ), using_qnn:true] (%10772:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1)], %10771:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=508, solved=1)]) -> (%10773:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.11.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1), ), using_qnn:true] (%10766:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1)]) -> (%10774:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.11.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1), ), using_qnn:true] (%10766:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1)]) -> (%10775:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.11.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=1), ), using_qnn:true] (%10775:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1)]) -> (%10776:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.11.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=1), ), using_qnn:true] (%10776:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=1)], %10774:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1)]) -> (%10777:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.11.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=1), ), using_qnn:true] (%10777:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10778:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.11.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1), ), using_qnn:true] (%10766:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10779:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.11.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1), ), using_qnn:true] (%10779:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1)], %10778:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=509, solved=1)]) -> (%10780:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.11.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=510, solved=1), ), using_qnn:true] (%10780:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=506, solved=1)]) -> (%10781:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=510, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.11.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=510, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1), ), using_qnn:true] (%10781:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=510, solved=1)]) -> (%10782:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.11.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1), ), using_qnn:true] (%10782:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1)]) -> (%10784:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.11.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=512, solved=1), ), using_qnn:true] (%10764:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=503, solved=1)]) -> (%10785:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=512, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.11.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=512, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=513, solved=1), ), using_qnn:true] (%10785:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=512, solved=1)]) -> (%10786:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=513, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.11.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1), ), using_qnn:true] (%9963:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1)], %10784:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1)]) -> (%10788:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.11.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=513, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1), ), using_qnn:true] (%9964:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1)], %10786:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=513, solved=1)]) -> (%10789:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.11.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1), ), using_qnn:true] (%10788:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1)]) -> (%10790:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.11.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1), ), using_qnn:true] (%10789:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1)]) -> (%10791:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.11.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=514, solved=1), ), using_qnn:true] (%10773:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=504, solved=1)], %10790:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=14, solved=1)]) -> (%10792:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=514, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.11.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=514, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=515, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=514, solved=1), ), using_qnn:true] (%10792:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=514, solved=1)], %10793:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=515, solved=1)]) -> (%10794:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=514, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.11.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=514, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=516, solved=1), ), using_qnn:true] (%10794:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=514, solved=1)]) -> (%10795:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=516, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.11.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=516, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=516, solved=1), ), using_qnn:true] (%10795:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=516, solved=1)], %10796:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=517, solved=1)]) -> (%10797:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=516, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.11.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=518, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=519, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10798:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=518, solved=1), constant:[0]]) -> (%10799:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=519, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.11.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=519, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=514, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=516, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520, solved=1), ), using_qnn:true] (%10799:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=519, solved=1)], %10794:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=514, solved=1)], %10797:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=516, solved=1)]) -> (%10800:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.11.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521, solved=1), ), using_qnn:true] (%10800:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=520, solved=1)]) -> (%10801:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.11.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=1), ), using_qnn:true] (%10801:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=521, solved=1)], %10791:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=50, solved=1)]) -> (%10802:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.11.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=1), ), using_qnn:true] (%10802:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=1)]) -> (%10803:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.11.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=1), ), using_qnn:true] (%10803:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=1)]) -> (%10804:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.11.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=523, solved=1)), using_qnn:true] (%10804:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=522, solved=1)]) -> (%10805:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.11.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=1), ), using_qnn:true] (%10805:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=1)]) -> (%10806:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.11.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10753:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10806:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=524, solved=1)]) -> (%10807:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.11.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=525, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=526, solved=1)), using_qnn:true] (%10807:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10808:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=525, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.11.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=525, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=525, solved=1), ), using_qnn:true] (%10808:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=525, solved=1)]) -> (%10809:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=525, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.11.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=525, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=527, solved=1)), using_qnn:true] (%10809:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=525, solved=1)]) -> (%10810:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.11.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=1), ), using_qnn:true] (%10810:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=1)]) -> (%10811:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.11.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=525, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=529, solved=1)), using_qnn:true] (%10809:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=525, solved=1)]) -> (%10812:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.11.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1), ), using_qnn:true] (%10812:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1)]) -> (%10813:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.11.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=1), ), using_qnn:true] (%10813:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1)]) -> (%10814:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.11.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1), ), using_qnn:true] (%10813:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1)], %10814:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=531, solved=1)]) -> (%10815:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.11.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1), ), using_qnn:true] (%10815:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1)], %10811:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=528, solved=1)]) -> (%10816:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.11.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1), ), using_qnn:true] (%10816:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1)]) -> (%10817:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.11.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=533, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=532, solved=1)), using_qnn:true] (%10817:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=530, solved=1)]) -> (%10818:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=533, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.11.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=533, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=533, solved=1), ), using_qnn:true] (%10818:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=533, solved=1)]) -> (%10819:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=533, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.11.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=533, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10807:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10819:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=533, solved=1)]) -> (%10820:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.12.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=534, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=535, solved=1)), using_qnn:true] (%10820:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10821:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=534, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.12.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=534, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=534, solved=1), ), using_qnn:true] (%10821:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=534, solved=1)]) -> (%10822:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=534, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.12.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=534, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=536, solved=1)), using_qnn:true] (%10822:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=534, solved=1)]) -> (%10823:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.12.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=534, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=538, solved=1)), using_qnn:true] (%10822:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=534, solved=1)]) -> (%10824:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.12.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=534, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=541, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=540, solved=1)), using_qnn:true] (%10822:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=534, solved=1)]) -> (%10825:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=541, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.12.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=1), ), using_qnn:true] (%10823:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=1)]) -> (%10826:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.12.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=1), ), using_qnn:true] (%10826:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=1)]) -> (%10827:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.12.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=1), ), using_qnn:true] (%10824:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=1)]) -> (%10828:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.12.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=1), ), using_qnn:true] (%10828:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=1)]) -> (%10829:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.12.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=541, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=541, solved=1), ), using_qnn:true] (%10825:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=541, solved=1)]) -> (%10830:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=541, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.12.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=541, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=541, solved=1), ), using_qnn:true] (%10830:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=541, solved=1)]) -> (%10831:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=541, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.12.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=543, solved=1)), using_qnn:true] (%10827:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=537, solved=1)]) -> (%10832:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.12.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=545, solved=1)), using_qnn:true] (%10829:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=539, solved=1)]) -> (%10833:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.12.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.12.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.12.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1), ), using_qnn:true] (%10832:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1)]) -> (%10834:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.12.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1), ), using_qnn:true] (%10832:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1)]) -> (%10835:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.12.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=1), ), using_qnn:true] (%10835:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1)]) -> (%10836:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.12.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=1), ), using_qnn:true] (%10836:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=1)], %10834:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1)]) -> (%10837:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.12.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=1), ), using_qnn:true] (%10837:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10838:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.12.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1), ), using_qnn:true] (%10832:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10839:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.12.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1), ), using_qnn:true] (%10839:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1)], %10838:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=546, solved=1)]) -> (%10840:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.12.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1), ), using_qnn:true] (%10833:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1)]) -> (%10841:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.12.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1), ), using_qnn:true] (%10833:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1)]) -> (%10842:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.12.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=1), ), using_qnn:true] (%10842:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1)]) -> (%10843:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.12.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=1), ), using_qnn:true] (%10843:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=1)], %10841:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1)]) -> (%10844:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.12.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=1), ), using_qnn:true] (%10844:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10845:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.12.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1), ), using_qnn:true] (%10833:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10846:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.12.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1), ), using_qnn:true] (%10846:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1)], %10845:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=547, solved=1)]) -> (%10847:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.12.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=548, solved=1), ), using_qnn:true] (%10847:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=544, solved=1)]) -> (%10848:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=548, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.12.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=548, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1), ), using_qnn:true] (%10848:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=548, solved=1)]) -> (%10849:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.12.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1), ), using_qnn:true] (%10849:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1)]) -> (%10851:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.12.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=541, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=550, solved=1), ), using_qnn:true] (%10831:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=541, solved=1)]) -> (%10852:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=550, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.12.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=550, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=551, solved=1), ), using_qnn:true] (%10852:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=550, solved=1)]) -> (%10853:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=551, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.12.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1), ), using_qnn:true] (%9965:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1)], %10851:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1)]) -> (%10855:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.12.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=551, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1), ), using_qnn:true] (%9966:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1)], %10853:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=551, solved=1)]) -> (%10856:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.12.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1), ), using_qnn:true] (%10855:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1)]) -> (%10857:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.12.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1), ), using_qnn:true] (%10856:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1)]) -> (%10858:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.12.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=1), ), using_qnn:true] (%10840:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=542, solved=1)], %10857:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=15, solved=1)]) -> (%10859:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.12.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=553, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=1), ), using_qnn:true] (%10859:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=1)], %10860:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=553, solved=1)]) -> (%10861:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.12.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=554, solved=1), ), using_qnn:true] (%10861:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=1)]) -> (%10862:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=554, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.12.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=554, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=555, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=554, solved=1), ), using_qnn:true] (%10862:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=554, solved=1)], %10863:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=555, solved=1)]) -> (%10864:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=554, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.12.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=556, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=557, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10865:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=556, solved=1), constant:[0]]) -> (%10866:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=557, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.12.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=557, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=554, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=558, solved=1), ), using_qnn:true] (%10866:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=557, solved=1)], %10861:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=552, solved=1)], %10864:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=554, solved=1)]) -> (%10867:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=558, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.12.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=558, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=1), ), using_qnn:true] (%10867:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=558, solved=1)]) -> (%10868:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.12.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=560, solved=1), ), using_qnn:true] (%10868:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=559, solved=1)], %10858:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=51, solved=1)]) -> (%10869:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=560, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.12.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=560, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=560, solved=1), ), using_qnn:true] (%10869:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=560, solved=1)]) -> (%10870:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=560, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.12.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=560, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=560, solved=1), ), using_qnn:true] (%10870:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=560, solved=1)]) -> (%10871:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=560, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.12.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=560, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=562, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=561, solved=1)), using_qnn:true] (%10871:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=560, solved=1)]) -> (%10872:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=562, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.12.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=562, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=562, solved=1), ), using_qnn:true] (%10872:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=562, solved=1)]) -> (%10873:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=562, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.12.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=562, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10820:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10873:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=562, solved=1)]) -> (%10874:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.12.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=564, solved=1)), using_qnn:true] (%10874:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10875:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.12.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=1), ), using_qnn:true] (%10875:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=1)]) -> (%10876:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.12.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=565, solved=1)), using_qnn:true] (%10876:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=1)]) -> (%10877:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.12.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=1), ), using_qnn:true] (%10877:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=1)]) -> (%10878:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.12.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=567, solved=1)), using_qnn:true] (%10876:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=563, solved=1)]) -> (%10879:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.12.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1), ), using_qnn:true] (%10879:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1)]) -> (%10880:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.12.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=569, solved=1), ), using_qnn:true] (%10880:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1)]) -> (%10881:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=569, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.12.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=569, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1), ), using_qnn:true] (%10880:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1)], %10881:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=569, solved=1)]) -> (%10882:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.12.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1), ), using_qnn:true] (%10882:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1)], %10878:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=566, solved=1)]) -> (%10883:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.12.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1), ), using_qnn:true] (%10883:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1)]) -> (%10884:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.12.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=571, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=570, solved=1)), using_qnn:true] (%10884:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=568, solved=1)]) -> (%10885:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=571, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.12.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=571, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=571, solved=1), ), using_qnn:true] (%10885:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=571, solved=1)]) -> (%10886:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=571, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.12.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=571, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10874:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10886:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=571, solved=1)]) -> (%10887:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.13.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=573, solved=1)), using_qnn:true] (%10887:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10888:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.13.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=1), ), using_qnn:true] (%10888:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=1)]) -> (%10889:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.13.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=574, solved=1)), using_qnn:true] (%10889:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=1)]) -> (%10890:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.13.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=577, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=576, solved=1)), using_qnn:true] (%10889:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=1)]) -> (%10891:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=577, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.13.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=578, solved=1)), using_qnn:true] (%10889:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=572, solved=1)]) -> (%10892:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.13.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=1), ), using_qnn:true] (%10890:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=1)]) -> (%10893:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.13.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=1), ), using_qnn:true] (%10893:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=1)]) -> (%10894:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.13.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=577, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=577, solved=1), ), using_qnn:true] (%10891:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=577, solved=1)]) -> (%10895:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=577, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.13.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=577, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=577, solved=1), ), using_qnn:true] (%10895:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=577, solved=1)]) -> (%10896:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=577, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.13.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=1), ), using_qnn:true] (%10892:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=1)]) -> (%10897:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.13.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=1), ), using_qnn:true] (%10897:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=1)]) -> (%10898:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.13.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=581, solved=1)), using_qnn:true] (%10894:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=575, solved=1)]) -> (%10899:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.13.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=577, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=583, solved=1)), using_qnn:true] (%10896:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=577, solved=1)]) -> (%10900:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.13.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.13.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.13.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1), ), using_qnn:true] (%10899:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1)]) -> (%10901:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.13.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1), ), using_qnn:true] (%10899:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1)]) -> (%10902:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.13.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=584, solved=1), ), using_qnn:true] (%10902:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1)]) -> (%10903:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=584, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.13.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=584, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=584, solved=1), ), using_qnn:true] (%10903:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=584, solved=1)], %10901:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1)]) -> (%10904:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=584, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.13.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=584, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=584, solved=1), ), using_qnn:true] (%10904:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=584, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10905:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=584, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.13.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1), ), using_qnn:true] (%10899:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10906:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.13.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=584, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1), ), using_qnn:true] (%10906:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1)], %10905:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=584, solved=1)]) -> (%10907:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.13.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1), ), using_qnn:true] (%10900:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1)]) -> (%10908:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.13.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1), ), using_qnn:true] (%10900:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1)]) -> (%10909:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.13.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=1), ), using_qnn:true] (%10909:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1)]) -> (%10910:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.13.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=1), ), using_qnn:true] (%10910:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=1)], %10908:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1)]) -> (%10911:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.13.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=1), ), using_qnn:true] (%10911:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10912:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.13.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1), ), using_qnn:true] (%10900:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10913:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.13.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1), ), using_qnn:true] (%10913:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1)], %10912:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=585, solved=1)]) -> (%10914:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.13.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=586, solved=1), ), using_qnn:true] (%10914:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=582, solved=1)]) -> (%10915:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=586, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.13.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=586, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1), ), using_qnn:true] (%10915:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=586, solved=1)]) -> (%10916:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.13.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1), ), using_qnn:true] (%10916:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1)]) -> (%10918:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.13.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=588, solved=1), ), using_qnn:true] (%10898:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=579, solved=1)]) -> (%10919:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=588, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.13.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=588, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=589, solved=1), ), using_qnn:true] (%10919:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=588, solved=1)]) -> (%10920:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=589, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.13.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1), ), using_qnn:true] (%9967:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1)], %10918:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1)]) -> (%10922:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.13.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=589, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1), ), using_qnn:true] (%9968:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1)], %10920:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=589, solved=1)]) -> (%10923:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.13.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1), ), using_qnn:true] (%10922:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1)]) -> (%10924:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.13.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1), ), using_qnn:true] (%10923:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1)]) -> (%10925:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.13.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=1), ), using_qnn:true] (%10907:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=580, solved=1)], %10924:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=16, solved=1)]) -> (%10926:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.13.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=1), ), using_qnn:true] (%10926:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=1)], %10927:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=591, solved=1)]) -> (%10928:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.13.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=592, solved=1), ), using_qnn:true] (%10928:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=1)]) -> (%10929:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=592, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.13.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=592, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=593, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=592, solved=1), ), using_qnn:true] (%10929:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=592, solved=1)], %10930:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=593, solved=1)]) -> (%10931:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=592, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.13.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=594, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=595, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10932:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=594, solved=1), constant:[0]]) -> (%10933:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=595, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.13.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=595, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=592, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=1), ), using_qnn:true] (%10933:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=595, solved=1)], %10928:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=590, solved=1)], %10931:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=592, solved=1)]) -> (%10934:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.13.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=597, solved=1), ), using_qnn:true] (%10934:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=596, solved=1)]) -> (%10935:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=597, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.13.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=597, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=1), ), using_qnn:true] (%10935:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=597, solved=1)], %10925:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=52, solved=1)]) -> (%10936:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.13.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=1), ), using_qnn:true] (%10936:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=1)]) -> (%10937:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.13.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=1), ), using_qnn:true] (%10937:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=1)]) -> (%10938:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.13.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=600, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=599, solved=1)), using_qnn:true] (%10938:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=598, solved=1)]) -> (%10939:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=600, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.13.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=600, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=600, solved=1), ), using_qnn:true] (%10939:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=600, solved=1)]) -> (%10940:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=600, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.13.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=600, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10887:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10940:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=600, solved=1)]) -> (%10941:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.13.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=602, solved=1)), using_qnn:true] (%10941:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10942:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.13.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=1), ), using_qnn:true] (%10942:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=1)]) -> (%10943:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.13.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=604, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=603, solved=1)), using_qnn:true] (%10943:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=1)]) -> (%10944:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=604, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.13.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=604, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=604, solved=1), ), using_qnn:true] (%10944:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=604, solved=1)]) -> (%10945:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=604, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.13.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=605, solved=1)), using_qnn:true] (%10943:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=601, solved=1)]) -> (%10946:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.13.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1), ), using_qnn:true] (%10946:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1)]) -> (%10947:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.13.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=1), ), using_qnn:true] (%10947:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1)]) -> (%10948:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.13.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1), ), using_qnn:true] (%10947:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1)], %10948:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=607, solved=1)]) -> (%10949:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.13.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=604, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1), ), using_qnn:true] (%10949:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1)], %10945:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=604, solved=1)]) -> (%10950:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.13.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1), ), using_qnn:true] (%10950:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1)]) -> (%10951:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.13.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=608, solved=1)), using_qnn:true] (%10951:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=606, solved=1)]) -> (%10952:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.13.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=1), ), using_qnn:true] (%10952:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=1)]) -> (%10953:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.13.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10941:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %10953:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=609, solved=1)]) -> (%10954:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.14.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=611, solved=1)), using_qnn:true] (%10954:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%10955:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.14.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=1), ), using_qnn:true] (%10955:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=1)]) -> (%10956:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.14.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=612, solved=1)), using_qnn:true] (%10956:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=1)]) -> (%10957:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.14.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=615, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=614, solved=1)), using_qnn:true] (%10956:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=1)]) -> (%10958:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=615, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.14.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=616, solved=1)), using_qnn:true] (%10956:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=610, solved=1)]) -> (%10959:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.14.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=1), ), using_qnn:true] (%10957:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=1)]) -> (%10960:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.14.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=1), ), using_qnn:true] (%10960:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=1)]) -> (%10961:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.14.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=615, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=615, solved=1), ), using_qnn:true] (%10958:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=615, solved=1)]) -> (%10962:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=615, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.14.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=615, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=615, solved=1), ), using_qnn:true] (%10962:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=615, solved=1)]) -> (%10963:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=615, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.14.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=1), ), using_qnn:true] (%10959:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=1)]) -> (%10964:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.14.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=1), ), using_qnn:true] (%10964:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=1)]) -> (%10965:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.14.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=619, solved=1)), using_qnn:true] (%10961:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=613, solved=1)]) -> (%10966:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.14.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=615, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=621, solved=1)), using_qnn:true] (%10963:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=615, solved=1)]) -> (%10967:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.14.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.14.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.14.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1), ), using_qnn:true] (%10966:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1)]) -> (%10968:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.14.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1), ), using_qnn:true] (%10966:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1)]) -> (%10969:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.14.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=1), ), using_qnn:true] (%10969:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1)]) -> (%10970:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.14.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=1), ), using_qnn:true] (%10970:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=1)], %10968:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1)]) -> (%10971:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.14.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=1), ), using_qnn:true] (%10971:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10972:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.14.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1), ), using_qnn:true] (%10966:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10973:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.14.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1), ), using_qnn:true] (%10973:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1)], %10972:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=622, solved=1)]) -> (%10974:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.14.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1), ), using_qnn:true] (%10967:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1)]) -> (%10975:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.14.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1), ), using_qnn:true] (%10967:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1)]) -> (%10976:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.14.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=1), ), using_qnn:true] (%10976:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1)]) -> (%10977:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.14.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=1), ), using_qnn:true] (%10977:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=1)], %10975:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1)]) -> (%10978:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.14.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=1), ), using_qnn:true] (%10978:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10979:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.14.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1), ), using_qnn:true] (%10967:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10980:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.14.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1), ), using_qnn:true] (%10980:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1)], %10979:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=623, solved=1)]) -> (%10981:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.14.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=624, solved=1), ), using_qnn:true] (%10981:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=620, solved=1)]) -> (%10982:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=624, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.14.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=624, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1), ), using_qnn:true] (%10982:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=624, solved=1)]) -> (%10983:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.14.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1), ), using_qnn:true] (%10983:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1)]) -> (%10985:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.14.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=626, solved=1), ), using_qnn:true] (%10965:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=617, solved=1)]) -> (%10986:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=626, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.14.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=626, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=627, solved=1), ), using_qnn:true] (%10986:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=626, solved=1)]) -> (%10987:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=627, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.14.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1), ), using_qnn:true] (%9969:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1)], %10985:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1)]) -> (%10989:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.14.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=627, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1), ), using_qnn:true] (%9970:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1)], %10987:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=627, solved=1)]) -> (%10990:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.14.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1), ), using_qnn:true] (%10989:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1)]) -> (%10991:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.14.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1), ), using_qnn:true] (%10990:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1)]) -> (%10992:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.14.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=628, solved=1), ), using_qnn:true] (%10974:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=618, solved=1)], %10991:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=17, solved=1)]) -> (%10993:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=628, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.14.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=628, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=628, solved=1), ), using_qnn:true] (%10993:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=628, solved=1)], %10994:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=629, solved=1)]) -> (%10995:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=628, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.14.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=628, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=630, solved=1), ), using_qnn:true] (%10995:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=628, solved=1)]) -> (%10996:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=630, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.14.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=630, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=630, solved=1), ), using_qnn:true] (%10996:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=630, solved=1)], %10997:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=631, solved=1)]) -> (%10998:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=630, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.14.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=632, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=633, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %10999:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=632, solved=1), constant:[0]]) -> (%11000:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=633, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.14.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=633, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=628, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=630, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=1), ), using_qnn:true] (%11000:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=633, solved=1)], %10995:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=628, solved=1)], %10998:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=630, solved=1)]) -> (%11001:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.14.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=635, solved=1), ), using_qnn:true] (%11001:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=634, solved=1)]) -> (%11002:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=635, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.14.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=635, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=1), ), using_qnn:true] (%11002:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=635, solved=1)], %10992:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=53, solved=1)]) -> (%11003:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.14.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=1), ), using_qnn:true] (%11003:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=1)]) -> (%11004:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.14.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=1), ), using_qnn:true] (%11004:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=1)]) -> (%11005:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.14.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=638, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=637, solved=1)), using_qnn:true] (%11005:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=636, solved=1)]) -> (%11006:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=638, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.14.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=638, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=638, solved=1), ), using_qnn:true] (%11006:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=638, solved=1)]) -> (%11007:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=638, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.14.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=638, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%10954:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11007:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=638, solved=1)]) -> (%11008:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.14.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=639, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=640, solved=1)), using_qnn:true] (%11008:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11009:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=639, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.14.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=639, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=639, solved=1), ), using_qnn:true] (%11009:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=639, solved=1)]) -> (%11010:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=639, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.14.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=639, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=641, solved=1)), using_qnn:true] (%11010:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=639, solved=1)]) -> (%11011:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.14.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=1), ), using_qnn:true] (%11011:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=1)]) -> (%11012:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.14.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=639, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=643, solved=1)), using_qnn:true] (%11010:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=639, solved=1)]) -> (%11013:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.14.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1), ), using_qnn:true] (%11013:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1)]) -> (%11014:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.14.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=645, solved=1), ), using_qnn:true] (%11014:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1)]) -> (%11015:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=645, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.14.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=645, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1), ), using_qnn:true] (%11014:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1)], %11015:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=645, solved=1)]) -> (%11016:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.14.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1), ), using_qnn:true] (%11016:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1)], %11012:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=642, solved=1)]) -> (%11017:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.14.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1), ), using_qnn:true] (%11017:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1)]) -> (%11018:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.14.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=647, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=646, solved=1)), using_qnn:true] (%11018:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=644, solved=1)]) -> (%11019:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=647, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.14.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=647, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=647, solved=1), ), using_qnn:true] (%11019:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=647, solved=1)]) -> (%11020:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=647, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.14.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=647, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11008:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11020:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=647, solved=1)]) -> (%11021:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.15.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=649, solved=1)), using_qnn:true] (%11021:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11022:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.15.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=1), ), using_qnn:true] (%11022:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=1)]) -> (%11023:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.15.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=650, solved=1)), using_qnn:true] (%11023:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=1)]) -> (%11024:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.15.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=652, solved=1)), using_qnn:true] (%11023:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=1)]) -> (%11025:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.15.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=654, solved=1)), using_qnn:true] (%11023:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=648, solved=1)]) -> (%11026:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.15.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=1), ), using_qnn:true] (%11024:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=1)]) -> (%11027:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.15.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=1), ), using_qnn:true] (%11027:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=1)]) -> (%11028:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.15.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=1), ), using_qnn:true] (%11025:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=1)]) -> (%11029:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.15.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=1), ), using_qnn:true] (%11029:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=1)]) -> (%11030:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.15.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=1), ), using_qnn:true] (%11026:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=1)]) -> (%11031:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.15.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=1), ), using_qnn:true] (%11031:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=1)]) -> (%11032:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.15.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=657, solved=1)), using_qnn:true] (%11028:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=651, solved=1)]) -> (%11033:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.15.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=659, solved=1)), using_qnn:true] (%11030:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=653, solved=1)]) -> (%11034:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.15.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.15.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.15.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1), ), using_qnn:true] (%11033:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1)]) -> (%11035:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.15.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1), ), using_qnn:true] (%11033:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1)]) -> (%11036:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.15.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=1), ), using_qnn:true] (%11036:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1)]) -> (%11037:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.15.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=1), ), using_qnn:true] (%11037:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=1)], %11035:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1)]) -> (%11038:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.15.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=1), ), using_qnn:true] (%11038:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11039:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.15.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1), ), using_qnn:true] (%11033:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11040:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.15.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1), ), using_qnn:true] (%11040:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1)], %11039:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=660, solved=1)]) -> (%11041:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.15.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1), ), using_qnn:true] (%11034:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1)]) -> (%11042:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.15.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1), ), using_qnn:true] (%11034:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1)]) -> (%11043:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.15.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=1), ), using_qnn:true] (%11043:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1)]) -> (%11044:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.15.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=1), ), using_qnn:true] (%11044:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=1)], %11042:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1)]) -> (%11045:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.15.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=1), ), using_qnn:true] (%11045:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11046:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.15.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1), ), using_qnn:true] (%11034:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11047:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.15.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1), ), using_qnn:true] (%11047:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1)], %11046:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=661, solved=1)]) -> (%11048:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.15.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=662, solved=1), ), using_qnn:true] (%11048:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=658, solved=1)]) -> (%11049:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=662, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.15.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=662, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1), ), using_qnn:true] (%11049:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=662, solved=1)]) -> (%11050:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.15.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1), ), using_qnn:true] (%11050:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1)]) -> (%11052:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.15.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=664, solved=1), ), using_qnn:true] (%11032:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=655, solved=1)]) -> (%11053:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=664, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.15.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=664, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=665, solved=1), ), using_qnn:true] (%11053:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=664, solved=1)]) -> (%11054:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=665, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.15.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1), ), using_qnn:true] (%9971:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1)], %11052:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1)]) -> (%11056:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.15.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=665, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1), ), using_qnn:true] (%9972:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1)], %11054:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=665, solved=1)]) -> (%11057:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.15.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1), ), using_qnn:true] (%11056:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1)]) -> (%11058:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.15.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1), ), using_qnn:true] (%11057:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1)]) -> (%11059:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.15.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=1), ), using_qnn:true] (%11041:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=656, solved=1)], %11058:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=18, solved=1)]) -> (%11060:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.15.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=667, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=1), ), using_qnn:true] (%11060:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=1)], %11061:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=667, solved=1)]) -> (%11062:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.15.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=1), ), using_qnn:true] (%11062:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=1)]) -> (%11063:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.15.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=1), ), using_qnn:true] (%11063:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=1)], %11064:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=669, solved=1)]) -> (%11065:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.15.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=670, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=671, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11066:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=670, solved=1), constant:[0]]) -> (%11067:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=671, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.15.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=671, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=672, solved=1), ), using_qnn:true] (%11067:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=671, solved=1)], %11062:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=666, solved=1)], %11065:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=668, solved=1)]) -> (%11068:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=672, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.15.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=672, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=673, solved=1), ), using_qnn:true] (%11068:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=672, solved=1)]) -> (%11069:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=673, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.15.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=673, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=674, solved=1), ), using_qnn:true] (%11069:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=673, solved=1)], %11059:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=54, solved=1)]) -> (%11070:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=674, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.15.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=674, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=674, solved=1), ), using_qnn:true] (%11070:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=674, solved=1)]) -> (%11071:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=674, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.15.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=674, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=674, solved=1), ), using_qnn:true] (%11071:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=674, solved=1)]) -> (%11072:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=674, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.15.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=674, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=676, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=675, solved=1)), using_qnn:true] (%11072:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=674, solved=1)]) -> (%11073:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=676, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.15.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=676, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=676, solved=1), ), using_qnn:true] (%11073:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=676, solved=1)]) -> (%11074:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=676, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.15.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=676, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11021:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11074:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=676, solved=1)]) -> (%11075:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.15.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=678, solved=1)), using_qnn:true] (%11075:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11076:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.15.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=1), ), using_qnn:true] (%11076:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=1)]) -> (%11077:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.15.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=680, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=679, solved=1)), using_qnn:true] (%11077:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=1)]) -> (%11078:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=680, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.15.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=680, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=680, solved=1), ), using_qnn:true] (%11078:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=680, solved=1)]) -> (%11079:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=680, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.15.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=681, solved=1)), using_qnn:true] (%11077:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=677, solved=1)]) -> (%11080:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.15.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1), ), using_qnn:true] (%11080:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1)]) -> (%11081:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.15.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=683, solved=1), ), using_qnn:true] (%11081:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1)]) -> (%11082:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=683, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.15.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=683, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1), ), using_qnn:true] (%11081:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1)], %11082:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=683, solved=1)]) -> (%11083:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.15.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=680, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1), ), using_qnn:true] (%11083:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1)], %11079:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=680, solved=1)]) -> (%11084:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.15.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1), ), using_qnn:true] (%11084:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1)]) -> (%11085:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.15.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=685, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=684, solved=1)), using_qnn:true] (%11085:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=682, solved=1)]) -> (%11086:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=685, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.15.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=685, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=685, solved=1), ), using_qnn:true] (%11086:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=685, solved=1)]) -> (%11087:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=685, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.15.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=685, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11075:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11087:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=685, solved=1)]) -> (%11088:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.16.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=687, solved=1)), using_qnn:true] (%11088:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11089:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.16.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=1), ), using_qnn:true] (%11089:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=1)]) -> (%11090:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.16.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=689, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=688, solved=1)), using_qnn:true] (%11090:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=1)]) -> (%11091:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=689, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.16.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=691, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=690, solved=1)), using_qnn:true] (%11090:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=1)]) -> (%11092:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=691, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.16.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=692, solved=1)), using_qnn:true] (%11090:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=686, solved=1)]) -> (%11093:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.16.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=689, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=689, solved=1), ), using_qnn:true] (%11091:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=689, solved=1)]) -> (%11094:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=689, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.16.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=689, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=689, solved=1), ), using_qnn:true] (%11094:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=689, solved=1)]) -> (%11095:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=689, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.16.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=691, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=691, solved=1), ), using_qnn:true] (%11092:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=691, solved=1)]) -> (%11096:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=691, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.16.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=691, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=691, solved=1), ), using_qnn:true] (%11096:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=691, solved=1)]) -> (%11097:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=691, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.16.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=1), ), using_qnn:true] (%11093:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=1)]) -> (%11098:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.16.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=1), ), using_qnn:true] (%11098:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=1)]) -> (%11099:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.16.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=689, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=695, solved=1)), using_qnn:true] (%11095:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=689, solved=1)]) -> (%11100:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.16.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=691, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=697, solved=1)), using_qnn:true] (%11097:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=691, solved=1)]) -> (%11101:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.16.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.16.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.16.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1), ), using_qnn:true] (%11100:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1)]) -> (%11102:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.16.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1), ), using_qnn:true] (%11100:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1)]) -> (%11103:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.16.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=698, solved=1), ), using_qnn:true] (%11103:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1)]) -> (%11104:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=698, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.16.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=698, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=698, solved=1), ), using_qnn:true] (%11104:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=698, solved=1)], %11102:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1)]) -> (%11105:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=698, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.16.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=698, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=698, solved=1), ), using_qnn:true] (%11105:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=698, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11106:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=698, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.16.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1), ), using_qnn:true] (%11100:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11107:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.16.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=698, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1), ), using_qnn:true] (%11107:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1)], %11106:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=698, solved=1)]) -> (%11108:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.16.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1), ), using_qnn:true] (%11101:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1)]) -> (%11109:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.16.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1), ), using_qnn:true] (%11101:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1)]) -> (%11110:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.16.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=1), ), using_qnn:true] (%11110:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1)]) -> (%11111:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.16.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=1), ), using_qnn:true] (%11111:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=1)], %11109:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1)]) -> (%11112:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.16.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=1), ), using_qnn:true] (%11112:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11113:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.16.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1), ), using_qnn:true] (%11101:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11114:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.16.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1), ), using_qnn:true] (%11114:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1)], %11113:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=699, solved=1)]) -> (%11115:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.16.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=700, solved=1), ), using_qnn:true] (%11115:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=696, solved=1)]) -> (%11116:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=700, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.16.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=700, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1), ), using_qnn:true] (%11116:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=700, solved=1)]) -> (%11117:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.16.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1), ), using_qnn:true] (%11117:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1)]) -> (%11119:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.16.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=702, solved=1), ), using_qnn:true] (%11099:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=693, solved=1)]) -> (%11120:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=702, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.16.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=702, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=703, solved=1), ), using_qnn:true] (%11120:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=702, solved=1)]) -> (%11121:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=703, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.16.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1), ), using_qnn:true] (%9973:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1)], %11119:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1)]) -> (%11123:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.16.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=703, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1), ), using_qnn:true] (%9974:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1)], %11121:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=703, solved=1)]) -> (%11124:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.16.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1), ), using_qnn:true] (%11123:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1)]) -> (%11125:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.16.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1), ), using_qnn:true] (%11124:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1)]) -> (%11126:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.16.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=1), ), using_qnn:true] (%11108:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=694, solved=1)], %11125:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=19, solved=1)]) -> (%11127:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.16.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=705, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=1), ), using_qnn:true] (%11127:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=1)], %11128:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=705, solved=1)]) -> (%11129:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.16.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=1), ), using_qnn:true] (%11129:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=1)]) -> (%11130:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.16.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=707, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=1), ), using_qnn:true] (%11130:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=1)], %11131:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=707, solved=1)]) -> (%11132:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.16.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=708, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=709, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11133:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=708, solved=1), constant:[0]]) -> (%11134:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=709, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.16.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=709, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=710, solved=1), ), using_qnn:true] (%11134:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=709, solved=1)], %11129:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=704, solved=1)], %11132:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=706, solved=1)]) -> (%11135:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=710, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.16.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=710, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=711, solved=1), ), using_qnn:true] (%11135:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=710, solved=1)]) -> (%11136:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=711, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.16.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=711, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=1), ), using_qnn:true] (%11136:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=711, solved=1)], %11126:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=55, solved=1)]) -> (%11137:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.16.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=1), ), using_qnn:true] (%11137:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=1)]) -> (%11138:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.16.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=1), ), using_qnn:true] (%11138:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=1)]) -> (%11139:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.16.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=713, solved=1)), using_qnn:true] (%11139:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=712, solved=1)]) -> (%11140:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.16.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=1), ), using_qnn:true] (%11140:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=1)]) -> (%11141:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.16.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11088:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11141:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=714, solved=1)]) -> (%11142:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.16.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=716, solved=1)), using_qnn:true] (%11142:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11143:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.16.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715, solved=1), ), using_qnn:true] (%11143:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715, solved=1)]) -> (%11144:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.16.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=717, solved=1)), using_qnn:true] (%11144:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715, solved=1)]) -> (%11145:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.16.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718, solved=1), ), using_qnn:true] (%11145:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718, solved=1)]) -> (%11146:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.16.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=719, solved=1)), using_qnn:true] (%11144:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=715, solved=1)]) -> (%11147:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.16.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1), ), using_qnn:true] (%11147:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1)]) -> (%11148:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.16.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721, solved=1), ), using_qnn:true] (%11148:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1)]) -> (%11149:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.16.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1), ), using_qnn:true] (%11148:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1)], %11149:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=721, solved=1)]) -> (%11150:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.16.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1), ), using_qnn:true] (%11150:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1)], %11146:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=718, solved=1)]) -> (%11151:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.16.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1), ), using_qnn:true] (%11151:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1)]) -> (%11152:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.16.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=723, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=722, solved=1)), using_qnn:true] (%11152:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=720, solved=1)]) -> (%11153:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=723, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.16.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=723, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=723, solved=1), ), using_qnn:true] (%11153:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=723, solved=1)]) -> (%11154:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=723, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.16.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=723, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11142:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11154:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=723, solved=1)]) -> (%11155:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.17.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=724, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=725, solved=1)), using_qnn:true] (%11155:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11156:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=724, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.17.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=724, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=724, solved=1), ), using_qnn:true] (%11156:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=724, solved=1)]) -> (%11157:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=724, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.17.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=724, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=726, solved=1)), using_qnn:true] (%11157:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=724, solved=1)]) -> (%11158:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.17.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=724, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=729, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=728, solved=1)), using_qnn:true] (%11157:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=724, solved=1)]) -> (%11159:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=729, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.17.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=724, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=730, solved=1)), using_qnn:true] (%11157:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=724, solved=1)]) -> (%11160:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.17.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=1), ), using_qnn:true] (%11158:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=1)]) -> (%11161:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.17.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=1), ), using_qnn:true] (%11161:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=1)]) -> (%11162:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.17.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=729, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=729, solved=1), ), using_qnn:true] (%11159:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=729, solved=1)]) -> (%11163:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=729, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.17.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=729, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=729, solved=1), ), using_qnn:true] (%11163:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=729, solved=1)]) -> (%11164:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=729, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.17.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=1), ), using_qnn:true] (%11160:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=1)]) -> (%11165:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.17.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=1), ), using_qnn:true] (%11165:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=1)]) -> (%11166:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.17.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=733, solved=1)), using_qnn:true] (%11162:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=727, solved=1)]) -> (%11167:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.17.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=729, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=735, solved=1)), using_qnn:true] (%11164:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=729, solved=1)]) -> (%11168:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.17.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.17.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.17.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1), ), using_qnn:true] (%11167:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1)]) -> (%11169:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.17.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1), ), using_qnn:true] (%11167:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1)]) -> (%11170:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.17.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=1), ), using_qnn:true] (%11170:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1)]) -> (%11171:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.17.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=1), ), using_qnn:true] (%11171:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=1)], %11169:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1)]) -> (%11172:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.17.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=1), ), using_qnn:true] (%11172:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11173:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.17.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1), ), using_qnn:true] (%11167:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11174:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.17.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1), ), using_qnn:true] (%11174:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1)], %11173:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=736, solved=1)]) -> (%11175:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.17.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1), ), using_qnn:true] (%11168:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1)]) -> (%11176:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.17.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1), ), using_qnn:true] (%11168:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1)]) -> (%11177:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.17.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=737, solved=1), ), using_qnn:true] (%11177:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1)]) -> (%11178:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=737, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.17.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=737, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=737, solved=1), ), using_qnn:true] (%11178:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=737, solved=1)], %11176:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1)]) -> (%11179:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=737, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.17.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=737, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=737, solved=1), ), using_qnn:true] (%11179:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=737, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11180:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=737, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.17.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1), ), using_qnn:true] (%11168:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11181:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.17.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=737, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1), ), using_qnn:true] (%11181:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1)], %11180:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=737, solved=1)]) -> (%11182:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.17.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=738, solved=1), ), using_qnn:true] (%11182:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=734, solved=1)]) -> (%11183:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=738, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.17.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=738, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1), ), using_qnn:true] (%11183:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=738, solved=1)]) -> (%11184:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.17.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1), ), using_qnn:true] (%11184:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1)]) -> (%11186:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.17.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=740, solved=1), ), using_qnn:true] (%11166:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=731, solved=1)]) -> (%11187:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=740, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.17.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=740, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=741, solved=1), ), using_qnn:true] (%11187:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=740, solved=1)]) -> (%11188:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=741, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.17.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1), ), using_qnn:true] (%9975:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1)], %11186:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1)]) -> (%11190:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.17.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=741, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1), ), using_qnn:true] (%9976:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1)], %11188:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=741, solved=1)]) -> (%11191:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.17.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1), ), using_qnn:true] (%11190:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1)]) -> (%11192:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.17.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1), ), using_qnn:true] (%11191:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1)]) -> (%11193:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.17.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=742, solved=1), ), using_qnn:true] (%11175:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=732, solved=1)], %11192:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=20, solved=1)]) -> (%11194:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=742, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.17.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=742, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=743, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=742, solved=1), ), using_qnn:true] (%11194:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=742, solved=1)], %11195:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=743, solved=1)]) -> (%11196:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=742, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.17.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=742, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=744, solved=1), ), using_qnn:true] (%11196:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=742, solved=1)]) -> (%11197:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=744, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.17.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=744, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=745, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=744, solved=1), ), using_qnn:true] (%11197:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=744, solved=1)], %11198:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=745, solved=1)]) -> (%11199:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=744, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.17.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=746, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=747, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11200:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=746, solved=1), constant:[0]]) -> (%11201:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=747, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.17.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=747, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=742, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=744, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=748, solved=1), ), using_qnn:true] (%11201:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=747, solved=1)], %11196:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=742, solved=1)], %11199:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=744, solved=1)]) -> (%11202:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=748, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.17.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=748, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=1), ), using_qnn:true] (%11202:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=748, solved=1)]) -> (%11203:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.17.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=1), ), using_qnn:true] (%11203:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=749, solved=1)], %11193:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=56, solved=1)]) -> (%11204:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.17.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=1), ), using_qnn:true] (%11204:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=1)]) -> (%11205:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.17.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=1), ), using_qnn:true] (%11205:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=1)]) -> (%11206:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.17.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=752, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=751, solved=1)), using_qnn:true] (%11206:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=750, solved=1)]) -> (%11207:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=752, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.17.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=752, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=752, solved=1), ), using_qnn:true] (%11207:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=752, solved=1)]) -> (%11208:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=752, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.17.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=752, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11155:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11208:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=752, solved=1)]) -> (%11209:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.17.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=754, solved=1)), using_qnn:true] (%11209:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11210:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.17.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=1), ), using_qnn:true] (%11210:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=1)]) -> (%11211:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.17.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=755, solved=1)), using_qnn:true] (%11211:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=1)]) -> (%11212:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.17.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756, solved=1), ), using_qnn:true] (%11212:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756, solved=1)]) -> (%11213:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.17.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=757, solved=1)), using_qnn:true] (%11211:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=753, solved=1)]) -> (%11214:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.17.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1), ), using_qnn:true] (%11214:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1)]) -> (%11215:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.17.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=759, solved=1), ), using_qnn:true] (%11215:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1)]) -> (%11216:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=759, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.17.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=759, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1), ), using_qnn:true] (%11215:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1)], %11216:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=759, solved=1)]) -> (%11217:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.17.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1), ), using_qnn:true] (%11217:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1)], %11213:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=756, solved=1)]) -> (%11218:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.17.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1), ), using_qnn:true] (%11218:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1)]) -> (%11219:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.17.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=761, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=760, solved=1)), using_qnn:true] (%11219:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=758, solved=1)]) -> (%11220:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=761, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.17.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=761, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=761, solved=1), ), using_qnn:true] (%11220:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=761, solved=1)]) -> (%11221:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=761, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.17.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=761, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11209:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11221:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=761, solved=1)]) -> (%11222:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.18.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=763, solved=1)), using_qnn:true] (%11222:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11223:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.18.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=1), ), using_qnn:true] (%11223:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=1)]) -> (%11224:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.18.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=764, solved=1)), using_qnn:true] (%11224:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=1)]) -> (%11225:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.18.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=766, solved=1)), using_qnn:true] (%11224:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=1)]) -> (%11226:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.18.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=768, solved=1)), using_qnn:true] (%11224:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=762, solved=1)]) -> (%11227:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.18.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=1), ), using_qnn:true] (%11225:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=1)]) -> (%11228:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.18.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=1), ), using_qnn:true] (%11228:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=1)]) -> (%11229:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.18.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=1), ), using_qnn:true] (%11226:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=1)]) -> (%11230:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.18.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=1), ), using_qnn:true] (%11230:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=1)]) -> (%11231:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.18.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=1), ), using_qnn:true] (%11227:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=1)]) -> (%11232:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.18.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=1), ), using_qnn:true] (%11232:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=1)]) -> (%11233:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.18.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=771, solved=1)), using_qnn:true] (%11229:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=765, solved=1)]) -> (%11234:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.18.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=773, solved=1)), using_qnn:true] (%11231:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=767, solved=1)]) -> (%11235:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.18.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.18.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.18.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1), ), using_qnn:true] (%11234:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1)]) -> (%11236:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.18.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1), ), using_qnn:true] (%11234:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1)]) -> (%11237:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.18.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=1), ), using_qnn:true] (%11237:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1)]) -> (%11238:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.18.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=1), ), using_qnn:true] (%11238:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=1)], %11236:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1)]) -> (%11239:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.18.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=1), ), using_qnn:true] (%11239:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11240:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.18.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1), ), using_qnn:true] (%11234:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11241:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.18.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1), ), using_qnn:true] (%11241:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1)], %11240:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=774, solved=1)]) -> (%11242:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.18.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1), ), using_qnn:true] (%11235:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1)]) -> (%11243:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.18.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1), ), using_qnn:true] (%11235:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1)]) -> (%11244:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.18.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=1), ), using_qnn:true] (%11244:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1)]) -> (%11245:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.18.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=1), ), using_qnn:true] (%11245:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=1)], %11243:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1)]) -> (%11246:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.18.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=1), ), using_qnn:true] (%11246:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11247:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.18.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1), ), using_qnn:true] (%11235:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11248:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.18.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1), ), using_qnn:true] (%11248:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1)], %11247:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=775, solved=1)]) -> (%11249:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.18.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=776, solved=1), ), using_qnn:true] (%11249:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=772, solved=1)]) -> (%11250:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=776, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.18.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=776, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1), ), using_qnn:true] (%11250:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=776, solved=1)]) -> (%11251:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.18.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1), ), using_qnn:true] (%11251:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1)]) -> (%11253:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.18.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=778, solved=1), ), using_qnn:true] (%11233:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=769, solved=1)]) -> (%11254:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=778, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.18.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=778, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=1), ), using_qnn:true] (%11254:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=778, solved=1)]) -> (%11255:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.18.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1), ), using_qnn:true] (%9977:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1)], %11253:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1)]) -> (%11257:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.18.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1), ), using_qnn:true] (%9978:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1)], %11255:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=1)]) -> (%11258:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.18.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1), ), using_qnn:true] (%11257:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1)]) -> (%11259:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.18.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1), ), using_qnn:true] (%11258:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1)]) -> (%11260:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.18.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=780, solved=1), ), using_qnn:true] (%11242:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=770, solved=1)], %11259:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=21, solved=1)]) -> (%11261:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=780, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.18.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=780, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=781, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=780, solved=1), ), using_qnn:true] (%11261:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=780, solved=1)], %11262:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=781, solved=1)]) -> (%11263:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=780, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.18.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=780, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=1), ), using_qnn:true] (%11263:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=780, solved=1)]) -> (%11264:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.18.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=783, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=1), ), using_qnn:true] (%11264:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=1)], %11265:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=783, solved=1)]) -> (%11266:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.18.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=784, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=785, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11267:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=784, solved=1), constant:[0]]) -> (%11268:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=785, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.18.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=785, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=780, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=786, solved=1), ), using_qnn:true] (%11268:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=785, solved=1)], %11263:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=780, solved=1)], %11266:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=782, solved=1)]) -> (%11269:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=786, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.18.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=786, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=787, solved=1), ), using_qnn:true] (%11269:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=786, solved=1)]) -> (%11270:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=787, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.18.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=787, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=1), ), using_qnn:true] (%11270:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=787, solved=1)], %11260:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=57, solved=1)]) -> (%11271:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.18.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=1), ), using_qnn:true] (%11271:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=1)]) -> (%11272:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.18.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=1), ), using_qnn:true] (%11272:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=1)]) -> (%11273:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.18.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=790, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=789, solved=1)), using_qnn:true] (%11273:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=788, solved=1)]) -> (%11274:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=790, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.18.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=790, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=790, solved=1), ), using_qnn:true] (%11274:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=790, solved=1)]) -> (%11275:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=790, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.18.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=790, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11222:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11275:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=790, solved=1)]) -> (%11276:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.18.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=792, solved=1)), using_qnn:true] (%11276:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11277:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.18.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=1), ), using_qnn:true] (%11277:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=1)]) -> (%11278:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.18.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=794, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=793, solved=1)), using_qnn:true] (%11278:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=1)]) -> (%11279:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=794, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.18.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=794, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=794, solved=1), ), using_qnn:true] (%11279:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=794, solved=1)]) -> (%11280:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=794, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.18.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=795, solved=1)), using_qnn:true] (%11278:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=791, solved=1)]) -> (%11281:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.18.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1), ), using_qnn:true] (%11281:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1)]) -> (%11282:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.18.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=1), ), using_qnn:true] (%11282:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1)]) -> (%11283:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.18.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1), ), using_qnn:true] (%11282:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1)], %11283:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=797, solved=1)]) -> (%11284:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.18.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=794, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1), ), using_qnn:true] (%11284:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1)], %11280:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=794, solved=1)]) -> (%11285:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.18.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1), ), using_qnn:true] (%11285:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1)]) -> (%11286:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.18.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=799, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=798, solved=1)), using_qnn:true] (%11286:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=796, solved=1)]) -> (%11287:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=799, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.18.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=799, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=799, solved=1), ), using_qnn:true] (%11287:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=799, solved=1)]) -> (%11288:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=799, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.18.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=799, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11276:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11288:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=799, solved=1)]) -> (%11289:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.19.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=801, solved=1)), using_qnn:true] (%11289:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11290:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.19.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=1), ), using_qnn:true] (%11290:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=1)]) -> (%11291:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.19.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=803, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=802, solved=1)), using_qnn:true] (%11291:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=1)]) -> (%11292:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=803, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.19.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=805, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=804, solved=1)), using_qnn:true] (%11291:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=1)]) -> (%11293:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=805, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.19.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=807, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=806, solved=1)), using_qnn:true] (%11291:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=800, solved=1)]) -> (%11294:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=807, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.19.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=803, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=803, solved=1), ), using_qnn:true] (%11292:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=803, solved=1)]) -> (%11295:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=803, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.19.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=803, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=803, solved=1), ), using_qnn:true] (%11295:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=803, solved=1)]) -> (%11296:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=803, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.19.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=805, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=805, solved=1), ), using_qnn:true] (%11293:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=805, solved=1)]) -> (%11297:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=805, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.19.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=805, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=805, solved=1), ), using_qnn:true] (%11297:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=805, solved=1)]) -> (%11298:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=805, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.19.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=807, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=807, solved=1), ), using_qnn:true] (%11294:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=807, solved=1)]) -> (%11299:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=807, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.19.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=807, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=807, solved=1), ), using_qnn:true] (%11299:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=807, solved=1)]) -> (%11300:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=807, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.19.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=803, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=809, solved=1)), using_qnn:true] (%11296:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=803, solved=1)]) -> (%11301:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.19.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=805, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=811, solved=1)), using_qnn:true] (%11298:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=805, solved=1)]) -> (%11302:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.19.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.19.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.19.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1), ), using_qnn:true] (%11301:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1)]) -> (%11303:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.19.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1), ), using_qnn:true] (%11301:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1)]) -> (%11304:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.19.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=1), ), using_qnn:true] (%11304:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1)]) -> (%11305:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.19.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=1), ), using_qnn:true] (%11305:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=1)], %11303:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1)]) -> (%11306:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.19.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=1), ), using_qnn:true] (%11306:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11307:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.19.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1), ), using_qnn:true] (%11301:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11308:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.19.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1), ), using_qnn:true] (%11308:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1)], %11307:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=812, solved=1)]) -> (%11309:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.19.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1), ), using_qnn:true] (%11302:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1)]) -> (%11310:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.19.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1), ), using_qnn:true] (%11302:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1)]) -> (%11311:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.19.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=813, solved=1), ), using_qnn:true] (%11311:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1)]) -> (%11312:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=813, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.19.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=813, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=813, solved=1), ), using_qnn:true] (%11312:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=813, solved=1)], %11310:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1)]) -> (%11313:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=813, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.19.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=813, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=813, solved=1), ), using_qnn:true] (%11313:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=813, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11314:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=813, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.19.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1), ), using_qnn:true] (%11302:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11315:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.19.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=813, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1), ), using_qnn:true] (%11315:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1)], %11314:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=813, solved=1)]) -> (%11316:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.19.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=814, solved=1), ), using_qnn:true] (%11316:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=810, solved=1)]) -> (%11317:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=814, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.19.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=814, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1), ), using_qnn:true] (%11317:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=814, solved=1)]) -> (%11318:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.19.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1), ), using_qnn:true] (%11318:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1)]) -> (%11320:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.19.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=807, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=816, solved=1), ), using_qnn:true] (%11300:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=807, solved=1)]) -> (%11321:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=816, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.19.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=816, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=817, solved=1), ), using_qnn:true] (%11321:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=816, solved=1)]) -> (%11322:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=817, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.19.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1), ), using_qnn:true] (%9979:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1)], %11320:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1)]) -> (%11324:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.19.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=817, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1), ), using_qnn:true] (%9980:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1)], %11322:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=817, solved=1)]) -> (%11325:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.19.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1), ), using_qnn:true] (%11324:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1)]) -> (%11326:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.19.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1), ), using_qnn:true] (%11325:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1)]) -> (%11327:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.19.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818, solved=1), ), using_qnn:true] (%11309:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=808, solved=1)], %11326:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=22, solved=1)]) -> (%11328:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.19.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818, solved=1), ), using_qnn:true] (%11328:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818, solved=1)], %11329:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=819, solved=1)]) -> (%11330:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.19.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=820, solved=1), ), using_qnn:true] (%11330:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818, solved=1)]) -> (%11331:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=820, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.19.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=820, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=821, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=820, solved=1), ), using_qnn:true] (%11331:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=820, solved=1)], %11332:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=821, solved=1)]) -> (%11333:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=820, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.19.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=822, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=823, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11334:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=822, solved=1), constant:[0]]) -> (%11335:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=823, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.19.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=823, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=820, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=1), ), using_qnn:true] (%11335:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=823, solved=1)], %11330:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=818, solved=1)], %11333:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=820, solved=1)]) -> (%11336:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.19.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=825, solved=1), ), using_qnn:true] (%11336:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=824, solved=1)]) -> (%11337:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=825, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.19.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=825, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=1), ), using_qnn:true] (%11337:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=825, solved=1)], %11327:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=58, solved=1)]) -> (%11338:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.19.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=1), ), using_qnn:true] (%11338:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=1)]) -> (%11339:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.19.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=1), ), using_qnn:true] (%11339:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=1)]) -> (%11340:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.19.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=828, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=827, solved=1)), using_qnn:true] (%11340:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=826, solved=1)]) -> (%11341:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=828, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.19.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=828, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=828, solved=1), ), using_qnn:true] (%11341:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=828, solved=1)]) -> (%11342:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=828, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.19.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=828, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11289:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11342:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=828, solved=1)]) -> (%11343:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.19.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=829, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=830, solved=1)), using_qnn:true] (%11343:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11344:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=829, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.19.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=829, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=829, solved=1), ), using_qnn:true] (%11344:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=829, solved=1)]) -> (%11345:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=829, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.19.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=829, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=831, solved=1)), using_qnn:true] (%11345:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=829, solved=1)]) -> (%11346:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.19.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=1), ), using_qnn:true] (%11346:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=1)]) -> (%11347:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.19.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=829, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=833, solved=1)), using_qnn:true] (%11345:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=829, solved=1)]) -> (%11348:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.19.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1), ), using_qnn:true] (%11348:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1)]) -> (%11349:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.19.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835, solved=1), ), using_qnn:true] (%11349:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1)]) -> (%11350:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.19.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1), ), using_qnn:true] (%11349:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1)], %11350:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=835, solved=1)]) -> (%11351:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.19.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1), ), using_qnn:true] (%11351:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1)], %11347:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=832, solved=1)]) -> (%11352:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.19.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1), ), using_qnn:true] (%11352:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1)]) -> (%11353:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.19.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=837, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=836, solved=1)), using_qnn:true] (%11353:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=834, solved=1)]) -> (%11354:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=837, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.19.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=837, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=837, solved=1), ), using_qnn:true] (%11354:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=837, solved=1)]) -> (%11355:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=837, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.19.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=837, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11343:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11355:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=837, solved=1)]) -> (%11356:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.20.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=838, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=839, solved=1)), using_qnn:true] (%11356:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11357:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=838, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.20.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=838, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=838, solved=1), ), using_qnn:true] (%11357:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=838, solved=1)]) -> (%11358:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=838, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.20.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=838, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=840, solved=1)), using_qnn:true] (%11358:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=838, solved=1)]) -> (%11359:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.20.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=838, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=842, solved=1)), using_qnn:true] (%11358:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=838, solved=1)]) -> (%11360:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.20.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=838, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=844, solved=1)), using_qnn:true] (%11358:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=838, solved=1)]) -> (%11361:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.20.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=1), ), using_qnn:true] (%11359:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=1)]) -> (%11362:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.20.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=1), ), using_qnn:true] (%11362:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=1)]) -> (%11363:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.20.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=1), ), using_qnn:true] (%11360:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=1)]) -> (%11364:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.20.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=1), ), using_qnn:true] (%11364:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=1)]) -> (%11365:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.20.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=1), ), using_qnn:true] (%11361:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=1)]) -> (%11366:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.20.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=1), ), using_qnn:true] (%11366:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=1)]) -> (%11367:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.20.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=847, solved=1)), using_qnn:true] (%11363:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=841, solved=1)]) -> (%11368:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.20.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=849, solved=1)), using_qnn:true] (%11365:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=843, solved=1)]) -> (%11369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.20.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.20.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.20.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1), ), using_qnn:true] (%11368:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1)]) -> (%11370:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.20.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1), ), using_qnn:true] (%11368:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1)]) -> (%11371:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.20.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=850, solved=1), ), using_qnn:true] (%11371:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1)]) -> (%11372:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=850, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.20.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=850, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=850, solved=1), ), using_qnn:true] (%11372:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=850, solved=1)], %11370:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1)]) -> (%11373:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=850, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.20.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=850, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=850, solved=1), ), using_qnn:true] (%11373:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=850, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11374:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=850, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.20.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1), ), using_qnn:true] (%11368:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11375:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.20.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=850, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1), ), using_qnn:true] (%11375:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1)], %11374:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=850, solved=1)]) -> (%11376:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.20.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1), ), using_qnn:true] (%11369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1)]) -> (%11377:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.20.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1), ), using_qnn:true] (%11369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1)]) -> (%11378:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.20.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=851, solved=1), ), using_qnn:true] (%11378:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1)]) -> (%11379:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=851, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.20.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=851, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=851, solved=1), ), using_qnn:true] (%11379:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=851, solved=1)], %11377:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1)]) -> (%11380:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=851, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.20.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=851, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=851, solved=1), ), using_qnn:true] (%11380:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=851, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=851, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.20.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1), ), using_qnn:true] (%11369:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11382:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.20.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=851, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1), ), using_qnn:true] (%11382:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1)], %11381:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=851, solved=1)]) -> (%11383:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.20.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=852, solved=1), ), using_qnn:true] (%11383:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=848, solved=1)]) -> (%11384:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=852, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.20.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=852, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1), ), using_qnn:true] (%11384:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=852, solved=1)]) -> (%11385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.20.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1), ), using_qnn:true] (%11385:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1)]) -> (%11387:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.20.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=854, solved=1), ), using_qnn:true] (%11367:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=845, solved=1)]) -> (%11388:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=854, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.20.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=854, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=855, solved=1), ), using_qnn:true] (%11388:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=854, solved=1)]) -> (%11389:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=855, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.20.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1), ), using_qnn:true] (%9981:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1)], %11387:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1)]) -> (%11391:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.20.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=855, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1), ), using_qnn:true] (%9982:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1)], %11389:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=855, solved=1)]) -> (%11392:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.20.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1), ), using_qnn:true] (%11391:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1)]) -> (%11393:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.20.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1), ), using_qnn:true] (%11392:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1)]) -> (%11394:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.20.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=856, solved=1), ), using_qnn:true] (%11376:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=846, solved=1)], %11393:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=23, solved=1)]) -> (%11395:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=856, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.20.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=856, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=857, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=856, solved=1), ), using_qnn:true] (%11395:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=856, solved=1)], %11396:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=857, solved=1)]) -> (%11397:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=856, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.20.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=856, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858, solved=1), ), using_qnn:true] (%11397:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=856, solved=1)]) -> (%11398:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.20.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858, solved=1), ), using_qnn:true] (%11398:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858, solved=1)], %11399:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=859, solved=1)]) -> (%11400:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.20.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=860, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=861, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11401:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=860, solved=1), constant:[0]]) -> (%11402:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=861, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.20.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=861, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=856, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862, solved=1), ), using_qnn:true] (%11402:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=861, solved=1)], %11397:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=856, solved=1)], %11400:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=858, solved=1)]) -> (%11403:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.20.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=863, solved=1), ), using_qnn:true] (%11403:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=862, solved=1)]) -> (%11404:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=863, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.20.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=863, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=864, solved=1), ), using_qnn:true] (%11404:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=863, solved=1)], %11394:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=59, solved=1)]) -> (%11405:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=864, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.20.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=864, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=864, solved=1), ), using_qnn:true] (%11405:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=864, solved=1)]) -> (%11406:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=864, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.20.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=864, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=864, solved=1), ), using_qnn:true] (%11406:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=864, solved=1)]) -> (%11407:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=864, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.20.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=864, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=866, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=865, solved=1)), using_qnn:true] (%11407:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=864, solved=1)]) -> (%11408:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=866, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.20.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=866, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=866, solved=1), ), using_qnn:true] (%11408:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=866, solved=1)]) -> (%11409:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=866, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.20.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=866, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11356:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11409:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=866, solved=1)]) -> (%11410:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.20.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=868, solved=1)), using_qnn:true] (%11410:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11411:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.20.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=1), ), using_qnn:true] (%11411:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=1)]) -> (%11412:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.20.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=869, solved=1)), using_qnn:true] (%11412:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=1)]) -> (%11413:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.20.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870, solved=1), ), using_qnn:true] (%11413:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870, solved=1)]) -> (%11414:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.20.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=871, solved=1)), using_qnn:true] (%11412:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=867, solved=1)]) -> (%11415:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.20.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1), ), using_qnn:true] (%11415:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1)]) -> (%11416:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.20.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=873, solved=1), ), using_qnn:true] (%11416:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1)]) -> (%11417:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=873, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.20.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=873, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1), ), using_qnn:true] (%11416:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1)], %11417:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=873, solved=1)]) -> (%11418:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.20.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1), ), using_qnn:true] (%11418:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1)], %11414:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=870, solved=1)]) -> (%11419:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.20.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1), ), using_qnn:true] (%11419:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1)]) -> (%11420:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.20.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=875, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=874, solved=1)), using_qnn:true] (%11420:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=872, solved=1)]) -> (%11421:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=875, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.20.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=875, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=875, solved=1), ), using_qnn:true] (%11421:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=875, solved=1)]) -> (%11422:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=875, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.20.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=875, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11410:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11422:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=875, solved=1)]) -> (%11423:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.21.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=877, solved=1)), using_qnn:true] (%11423:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11424:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.21.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=1), ), using_qnn:true] (%11424:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=1)]) -> (%11425:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.21.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=878, solved=1)), using_qnn:true] (%11425:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=1)]) -> (%11426:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.21.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=880, solved=1)), using_qnn:true] (%11425:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=1)]) -> (%11427:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.21.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=883, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=882, solved=1)), using_qnn:true] (%11425:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=876, solved=1)]) -> (%11428:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=883, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.21.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=1), ), using_qnn:true] (%11426:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=1)]) -> (%11429:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.21.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=1), ), using_qnn:true] (%11429:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=1)]) -> (%11430:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.21.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=1), ), using_qnn:true] (%11427:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=1)]) -> (%11431:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.21.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=1), ), using_qnn:true] (%11431:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=1)]) -> (%11432:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.21.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=883, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=883, solved=1), ), using_qnn:true] (%11428:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=883, solved=1)]) -> (%11433:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=883, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.21.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=883, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=883, solved=1), ), using_qnn:true] (%11433:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=883, solved=1)]) -> (%11434:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=883, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.21.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=885, solved=1)), using_qnn:true] (%11430:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=879, solved=1)]) -> (%11435:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.21.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=887, solved=1)), using_qnn:true] (%11432:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=881, solved=1)]) -> (%11436:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.21.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.21.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.21.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1), ), using_qnn:true] (%11435:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1)]) -> (%11437:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.21.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1), ), using_qnn:true] (%11435:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1)]) -> (%11438:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.21.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=1), ), using_qnn:true] (%11438:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1)]) -> (%11439:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.21.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=1), ), using_qnn:true] (%11439:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=1)], %11437:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1)]) -> (%11440:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.21.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=1), ), using_qnn:true] (%11440:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11441:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.21.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1), ), using_qnn:true] (%11435:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11442:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.21.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1), ), using_qnn:true] (%11442:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1)], %11441:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=888, solved=1)]) -> (%11443:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.21.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1), ), using_qnn:true] (%11436:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1)]) -> (%11444:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.21.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1), ), using_qnn:true] (%11436:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1)]) -> (%11445:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.21.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=1), ), using_qnn:true] (%11445:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1)]) -> (%11446:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.21.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=1), ), using_qnn:true] (%11446:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=1)], %11444:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1)]) -> (%11447:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.21.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=1), ), using_qnn:true] (%11447:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11448:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.21.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1), ), using_qnn:true] (%11436:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11449:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.21.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1), ), using_qnn:true] (%11449:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1)], %11448:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=889, solved=1)]) -> (%11450:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.21.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=890, solved=1), ), using_qnn:true] (%11450:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=886, solved=1)]) -> (%11451:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=890, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.21.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=890, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1), ), using_qnn:true] (%11451:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=890, solved=1)]) -> (%11452:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.21.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1), ), using_qnn:true] (%11452:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1)]) -> (%11454:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.21.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=883, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=892, solved=1), ), using_qnn:true] (%11434:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=883, solved=1)]) -> (%11455:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=892, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.21.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=892, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=893, solved=1), ), using_qnn:true] (%11455:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=892, solved=1)]) -> (%11456:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=893, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.21.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1), ), using_qnn:true] (%9983:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1)], %11454:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1)]) -> (%11458:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.21.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=893, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1), ), using_qnn:true] (%9984:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1)], %11456:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=893, solved=1)]) -> (%11459:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.21.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1), ), using_qnn:true] (%11458:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1)]) -> (%11460:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.21.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1), ), using_qnn:true] (%11459:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1)]) -> (%11461:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.21.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=1), ), using_qnn:true] (%11443:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=884, solved=1)], %11460:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=24, solved=1)]) -> (%11462:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.21.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=895, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=1), ), using_qnn:true] (%11462:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=1)], %11463:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=895, solved=1)]) -> (%11464:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.21.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896, solved=1), ), using_qnn:true] (%11464:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=1)]) -> (%11465:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.21.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896, solved=1), ), using_qnn:true] (%11465:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896, solved=1)], %11466:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=897, solved=1)]) -> (%11467:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.21.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=898, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=899, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11468:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=898, solved=1), constant:[0]]) -> (%11469:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=899, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.21.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=899, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=900, solved=1), ), using_qnn:true] (%11469:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=899, solved=1)], %11464:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=894, solved=1)], %11467:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=896, solved=1)]) -> (%11470:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=900, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.21.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=900, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=901, solved=1), ), using_qnn:true] (%11470:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=900, solved=1)]) -> (%11471:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=901, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.21.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=901, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=1), ), using_qnn:true] (%11471:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=901, solved=1)], %11461:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=60, solved=1)]) -> (%11472:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.21.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=1), ), using_qnn:true] (%11472:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=1)]) -> (%11473:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.21.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=1), ), using_qnn:true] (%11473:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=1)]) -> (%11474:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.21.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=904, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=903, solved=1)), using_qnn:true] (%11474:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=902, solved=1)]) -> (%11475:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=904, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.21.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=904, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=904, solved=1), ), using_qnn:true] (%11475:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=904, solved=1)]) -> (%11476:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=904, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.21.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=904, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11423:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11476:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=904, solved=1)]) -> (%11477:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.21.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=906, solved=1)), using_qnn:true] (%11477:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11478:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.21.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=1), ), using_qnn:true] (%11478:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=1)]) -> (%11479:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.21.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=908, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=907, solved=1)), using_qnn:true] (%11479:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=1)]) -> (%11480:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=908, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.21.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=908, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=908, solved=1), ), using_qnn:true] (%11480:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=908, solved=1)]) -> (%11481:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=908, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.21.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=909, solved=1)), using_qnn:true] (%11479:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=905, solved=1)]) -> (%11482:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.21.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1), ), using_qnn:true] (%11482:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1)]) -> (%11483:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.21.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=1), ), using_qnn:true] (%11483:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1)]) -> (%11484:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.21.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1), ), using_qnn:true] (%11483:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1)], %11484:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=911, solved=1)]) -> (%11485:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.21.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=908, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1), ), using_qnn:true] (%11485:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1)], %11481:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=908, solved=1)]) -> (%11486:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.21.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1), ), using_qnn:true] (%11486:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1)]) -> (%11487:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.21.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=912, solved=1)), using_qnn:true] (%11487:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=910, solved=1)]) -> (%11488:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.21.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=1), ), using_qnn:true] (%11488:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=1)]) -> (%11489:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.21.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11477:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11489:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=913, solved=1)]) -> (%11490:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.22.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=915, solved=1)), using_qnn:true] (%11490:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11491:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.22.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=1), ), using_qnn:true] (%11491:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=1)]) -> (%11492:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.22.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=916, solved=1)), using_qnn:true] (%11492:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=1)]) -> (%11493:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.22.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=919, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=918, solved=1)), using_qnn:true] (%11492:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=1)]) -> (%11494:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=919, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.22.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=921, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=920, solved=1)), using_qnn:true] (%11492:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=914, solved=1)]) -> (%11495:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=921, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.22.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=1), ), using_qnn:true] (%11493:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=1)]) -> (%11496:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.22.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=1), ), using_qnn:true] (%11496:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=1)]) -> (%11497:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.22.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=919, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=919, solved=1), ), using_qnn:true] (%11494:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=919, solved=1)]) -> (%11498:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=919, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.22.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=919, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=919, solved=1), ), using_qnn:true] (%11498:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=919, solved=1)]) -> (%11499:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=919, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.22.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=921, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=921, solved=1), ), using_qnn:true] (%11495:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=921, solved=1)]) -> (%11500:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=921, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.22.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=921, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=921, solved=1), ), using_qnn:true] (%11500:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=921, solved=1)]) -> (%11501:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=921, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.22.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=923, solved=1)), using_qnn:true] (%11497:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=917, solved=1)]) -> (%11502:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.22.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=919, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=925, solved=1)), using_qnn:true] (%11499:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=919, solved=1)]) -> (%11503:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.22.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.22.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.22.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1), ), using_qnn:true] (%11502:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1)]) -> (%11504:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.22.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1), ), using_qnn:true] (%11502:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1)]) -> (%11505:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.22.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=926, solved=1), ), using_qnn:true] (%11505:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1)]) -> (%11506:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=926, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.22.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=926, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=926, solved=1), ), using_qnn:true] (%11506:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=926, solved=1)], %11504:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1)]) -> (%11507:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=926, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.22.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=926, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=926, solved=1), ), using_qnn:true] (%11507:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=926, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11508:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=926, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.22.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1), ), using_qnn:true] (%11502:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11509:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.22.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=926, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1), ), using_qnn:true] (%11509:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1)], %11508:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=926, solved=1)]) -> (%11510:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.22.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1), ), using_qnn:true] (%11503:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1)]) -> (%11511:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.22.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1), ), using_qnn:true] (%11503:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1)]) -> (%11512:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.22.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=927, solved=1), ), using_qnn:true] (%11512:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1)]) -> (%11513:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=927, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.22.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=927, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=927, solved=1), ), using_qnn:true] (%11513:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=927, solved=1)], %11511:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1)]) -> (%11514:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=927, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.22.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=927, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=927, solved=1), ), using_qnn:true] (%11514:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=927, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11515:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=927, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.22.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1), ), using_qnn:true] (%11503:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11516:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.22.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=927, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1), ), using_qnn:true] (%11516:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1)], %11515:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=927, solved=1)]) -> (%11517:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.22.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=928, solved=1), ), using_qnn:true] (%11517:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=924, solved=1)]) -> (%11518:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=928, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.22.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=928, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1), ), using_qnn:true] (%11518:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=928, solved=1)]) -> (%11519:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.22.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1), ), using_qnn:true] (%11519:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1)]) -> (%11521:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.22.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=921, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=930, solved=1), ), using_qnn:true] (%11501:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=921, solved=1)]) -> (%11522:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=930, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.22.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=930, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=931, solved=1), ), using_qnn:true] (%11522:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=930, solved=1)]) -> (%11523:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=931, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.22.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1), ), using_qnn:true] (%9985:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1)], %11521:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1)]) -> (%11525:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.22.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=931, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1), ), using_qnn:true] (%9986:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1)], %11523:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=931, solved=1)]) -> (%11526:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.22.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1), ), using_qnn:true] (%11525:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1)]) -> (%11527:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.22.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1), ), using_qnn:true] (%11526:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1)]) -> (%11528:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.22.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=1), ), using_qnn:true] (%11510:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=922, solved=1)], %11527:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=25, solved=1)]) -> (%11529:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.22.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=933, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=1), ), using_qnn:true] (%11529:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=1)], %11530:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=933, solved=1)]) -> (%11531:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.22.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=934, solved=1), ), using_qnn:true] (%11531:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=1)]) -> (%11532:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=934, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.22.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=934, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=935, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=934, solved=1), ), using_qnn:true] (%11532:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=934, solved=1)], %11533:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=935, solved=1)]) -> (%11534:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=934, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.22.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=936, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=937, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11535:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=936, solved=1), constant:[0]]) -> (%11536:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=937, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.22.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=937, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=934, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=938, solved=1), ), using_qnn:true] (%11536:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=937, solved=1)], %11531:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=932, solved=1)], %11534:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=934, solved=1)]) -> (%11537:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=938, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.22.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=938, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=939, solved=1), ), using_qnn:true] (%11537:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=938, solved=1)]) -> (%11538:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=939, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.22.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=939, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=1), ), using_qnn:true] (%11538:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=939, solved=1)], %11528:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=61, solved=1)]) -> (%11539:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.22.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=1), ), using_qnn:true] (%11539:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=1)]) -> (%11540:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.22.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=1), ), using_qnn:true] (%11540:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=1)]) -> (%11541:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.22.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=942, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=941, solved=1)), using_qnn:true] (%11541:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=940, solved=1)]) -> (%11542:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=942, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.22.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=942, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=942, solved=1), ), using_qnn:true] (%11542:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=942, solved=1)]) -> (%11543:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=942, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.22.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=942, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11490:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11543:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=942, solved=1)]) -> (%11544:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.22.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=943, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=944, solved=1)), using_qnn:true] (%11544:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11545:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=943, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.22.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=943, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=943, solved=1), ), using_qnn:true] (%11545:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=943, solved=1)]) -> (%11546:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=943, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.22.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=943, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=945, solved=1)), using_qnn:true] (%11546:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=943, solved=1)]) -> (%11547:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.22.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=1), ), using_qnn:true] (%11547:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=1)]) -> (%11548:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.22.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=943, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=947, solved=1)), using_qnn:true] (%11546:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=943, solved=1)]) -> (%11549:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.22.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1), ), using_qnn:true] (%11549:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1)]) -> (%11550:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.22.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=1), ), using_qnn:true] (%11550:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1)]) -> (%11551:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.22.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1), ), using_qnn:true] (%11550:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1)], %11551:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=949, solved=1)]) -> (%11552:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.22.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1), ), using_qnn:true] (%11552:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1)], %11548:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=946, solved=1)]) -> (%11553:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.22.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1), ), using_qnn:true] (%11553:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1)]) -> (%11554:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.22.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=950, solved=1)), using_qnn:true] (%11554:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=948, solved=1)]) -> (%11555:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.22.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=1), ), using_qnn:true] (%11555:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=1)]) -> (%11556:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.22.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11544:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11556:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=951, solved=1)]) -> (%11557:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.23.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=953, solved=1)), using_qnn:true] (%11557:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11558:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.23.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=1), ), using_qnn:true] (%11558:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=1)]) -> (%11559:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.23.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=955, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=954, solved=1)), using_qnn:true] (%11559:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=1)]) -> (%11560:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=955, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.23.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=956, solved=1)), using_qnn:true] (%11559:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=1)]) -> (%11561:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.23.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=958, solved=1)), using_qnn:true] (%11559:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=952, solved=1)]) -> (%11562:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.23.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=955, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=955, solved=1), ), using_qnn:true] (%11560:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=955, solved=1)]) -> (%11563:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=955, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.23.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=955, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=955, solved=1), ), using_qnn:true] (%11563:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=955, solved=1)]) -> (%11564:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=955, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.23.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=1), ), using_qnn:true] (%11561:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=1)]) -> (%11565:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.23.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=1), ), using_qnn:true] (%11565:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=1)]) -> (%11566:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.23.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=1), ), using_qnn:true] (%11562:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=1)]) -> (%11567:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.23.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=1), ), using_qnn:true] (%11567:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=1)]) -> (%11568:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.23.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=955, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=961, solved=1)), using_qnn:true] (%11564:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=955, solved=1)]) -> (%11569:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.23.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=963, solved=1)), using_qnn:true] (%11566:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=957, solved=1)]) -> (%11570:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.23.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.23.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.23.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1), ), using_qnn:true] (%11569:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1)]) -> (%11571:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.23.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1), ), using_qnn:true] (%11569:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1)]) -> (%11572:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.23.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=1), ), using_qnn:true] (%11572:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1)]) -> (%11573:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.23.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=1), ), using_qnn:true] (%11573:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=1)], %11571:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1)]) -> (%11574:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.23.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=1), ), using_qnn:true] (%11574:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11575:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.23.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1), ), using_qnn:true] (%11569:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11576:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.23.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1), ), using_qnn:true] (%11576:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1)], %11575:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=964, solved=1)]) -> (%11577:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.23.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1), ), using_qnn:true] (%11570:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1)]) -> (%11578:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.23.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1), ), using_qnn:true] (%11570:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1)]) -> (%11579:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.23.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=965, solved=1), ), using_qnn:true] (%11579:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1)]) -> (%11580:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=965, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.23.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=965, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=965, solved=1), ), using_qnn:true] (%11580:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=965, solved=1)], %11578:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1)]) -> (%11581:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=965, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.23.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=965, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=965, solved=1), ), using_qnn:true] (%11581:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=965, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11582:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=965, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.23.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1), ), using_qnn:true] (%11570:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11583:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.23.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=965, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1), ), using_qnn:true] (%11583:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1)], %11582:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=965, solved=1)]) -> (%11584:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.23.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=966, solved=1), ), using_qnn:true] (%11584:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=962, solved=1)]) -> (%11585:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=966, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.23.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=966, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1), ), using_qnn:true] (%11585:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=966, solved=1)]) -> (%11586:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.23.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1), ), using_qnn:true] (%11586:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1)]) -> (%11588:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.23.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=968, solved=1), ), using_qnn:true] (%11568:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=959, solved=1)]) -> (%11589:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=968, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.23.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=968, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=969, solved=1), ), using_qnn:true] (%11589:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=968, solved=1)]) -> (%11590:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=969, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.23.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1), ), using_qnn:true] (%9987:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1)], %11588:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1)]) -> (%11592:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.23.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=969, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1), ), using_qnn:true] (%9988:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1)], %11590:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=969, solved=1)]) -> (%11593:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.23.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1), ), using_qnn:true] (%11592:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1)]) -> (%11594:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.23.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1), ), using_qnn:true] (%11593:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1)]) -> (%11595:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.23.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=1), ), using_qnn:true] (%11577:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=960, solved=1)], %11594:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=26, solved=1)]) -> (%11596:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.23.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=971, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=1), ), using_qnn:true] (%11596:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=1)], %11597:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=971, solved=1)]) -> (%11598:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.23.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=1), ), using_qnn:true] (%11598:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=1)]) -> (%11599:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.23.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=973, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=1), ), using_qnn:true] (%11599:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=1)], %11600:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=973, solved=1)]) -> (%11601:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.23.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=974, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=975, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11602:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=974, solved=1), constant:[0]]) -> (%11603:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=975, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.23.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=975, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976, solved=1), ), using_qnn:true] (%11603:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=975, solved=1)], %11598:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=970, solved=1)], %11601:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=972, solved=1)]) -> (%11604:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.23.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=977, solved=1), ), using_qnn:true] (%11604:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=976, solved=1)]) -> (%11605:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=977, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.23.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=977, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=978, solved=1), ), using_qnn:true] (%11605:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=977, solved=1)], %11595:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=62, solved=1)]) -> (%11606:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=978, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.23.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=978, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=978, solved=1), ), using_qnn:true] (%11606:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=978, solved=1)]) -> (%11607:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=978, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.23.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=978, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=978, solved=1), ), using_qnn:true] (%11607:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=978, solved=1)]) -> (%11608:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=978, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.23.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=978, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=980, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=979, solved=1)), using_qnn:true] (%11608:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=978, solved=1)]) -> (%11609:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=980, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.23.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=980, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=980, solved=1), ), using_qnn:true] (%11609:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=980, solved=1)]) -> (%11610:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=980, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.23.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=980, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11557:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11610:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=980, solved=1)]) -> (%11611:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.23.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=982, solved=1)), using_qnn:true] (%11611:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11612:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.23.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=1), ), using_qnn:true] (%11612:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=1)]) -> (%11613:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.23.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=983, solved=1)), using_qnn:true] (%11613:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=1)]) -> (%11614:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.23.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=1), ), using_qnn:true] (%11614:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=1)]) -> (%11615:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.23.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=985, solved=1)), using_qnn:true] (%11613:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=981, solved=1)]) -> (%11616:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.23.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1), ), using_qnn:true] (%11616:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1)]) -> (%11617:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.23.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=987, solved=1), ), using_qnn:true] (%11617:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1)]) -> (%11618:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=987, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.23.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=987, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1), ), using_qnn:true] (%11617:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1)], %11618:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=987, solved=1)]) -> (%11619:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.23.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1), ), using_qnn:true] (%11619:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1)], %11615:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=984, solved=1)]) -> (%11620:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.23.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1), ), using_qnn:true] (%11620:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1)]) -> (%11621:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.23.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=989, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=988, solved=1)), using_qnn:true] (%11621:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=986, solved=1)]) -> (%11622:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=989, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.23.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=989, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=989, solved=1), ), using_qnn:true] (%11622:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=989, solved=1)]) -> (%11623:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=989, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.23.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=989, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11611:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11623:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=989, solved=1)]) -> (%11624:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.24.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=990, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=991, solved=1)), using_qnn:true] (%11624:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11625:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=990, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.24.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=990, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=990, solved=1), ), using_qnn:true] (%11625:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=990, solved=1)]) -> (%11626:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=990, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.24.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=990, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=992, solved=1)), using_qnn:true] (%11626:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=990, solved=1)]) -> (%11627:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.24.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=990, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=994, solved=1)), using_qnn:true] (%11626:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=990, solved=1)]) -> (%11628:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.24.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=990, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=997, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=996, solved=1)), using_qnn:true] (%11626:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=990, solved=1)]) -> (%11629:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=997, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.24.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=1), ), using_qnn:true] (%11627:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=1)]) -> (%11630:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.24.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=1), ), using_qnn:true] (%11630:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=1)]) -> (%11631:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.24.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=1), ), using_qnn:true] (%11628:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=1)]) -> (%11632:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.24.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=1), ), using_qnn:true] (%11632:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=1)]) -> (%11633:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.24.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=997, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=997, solved=1), ), using_qnn:true] (%11629:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=997, solved=1)]) -> (%11634:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=997, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.24.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=997, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=997, solved=1), ), using_qnn:true] (%11634:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=997, solved=1)]) -> (%11635:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=997, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.24.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=999, solved=1)), using_qnn:true] (%11631:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=993, solved=1)]) -> (%11636:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.24.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1001, solved=1)), using_qnn:true] (%11633:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=995, solved=1)]) -> (%11637:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.24.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.24.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.24.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1), ), using_qnn:true] (%11636:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1)]) -> (%11638:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.24.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1), ), using_qnn:true] (%11636:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1)]) -> (%11639:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.24.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=1), ), using_qnn:true] (%11639:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1)]) -> (%11640:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.24.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=1), ), using_qnn:true] (%11640:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=1)], %11638:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1)]) -> (%11641:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.24.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=1), ), using_qnn:true] (%11641:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11642:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.24.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1), ), using_qnn:true] (%11636:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11643:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.24.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1), ), using_qnn:true] (%11643:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1)], %11642:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1002, solved=1)]) -> (%11644:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.24.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1), ), using_qnn:true] (%11637:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1)]) -> (%11645:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.24.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1), ), using_qnn:true] (%11637:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1)]) -> (%11646:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.24.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=1), ), using_qnn:true] (%11646:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1)]) -> (%11647:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.24.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=1), ), using_qnn:true] (%11647:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=1)], %11645:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1)]) -> (%11648:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.24.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=1), ), using_qnn:true] (%11648:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11649:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.24.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1), ), using_qnn:true] (%11637:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11650:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.24.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1), ), using_qnn:true] (%11650:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1)], %11649:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1003, solved=1)]) -> (%11651:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.24.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1004, solved=1), ), using_qnn:true] (%11651:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1000, solved=1)]) -> (%11652:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1004, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.24.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1004, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1), ), using_qnn:true] (%11652:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1004, solved=1)]) -> (%11653:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.24.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1), ), using_qnn:true] (%11653:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1)]) -> (%11655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.24.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=997, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1006, solved=1), ), using_qnn:true] (%11635:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=997, solved=1)]) -> (%11656:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1006, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.24.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1006, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1007, solved=1), ), using_qnn:true] (%11656:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1006, solved=1)]) -> (%11657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1007, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.24.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1), ), using_qnn:true] (%9989:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1)], %11655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1)]) -> (%11659:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.24.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1007, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1), ), using_qnn:true] (%9990:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1)], %11657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1007, solved=1)]) -> (%11660:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.24.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1), ), using_qnn:true] (%11659:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1)]) -> (%11661:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.24.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1), ), using_qnn:true] (%11660:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1)]) -> (%11662:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.24.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=1), ), using_qnn:true] (%11644:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=998, solved=1)], %11661:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=27, solved=1)]) -> (%11663:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.24.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1009, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=1), ), using_qnn:true] (%11663:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=1)], %11664:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1009, solved=1)]) -> (%11665:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.24.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010, solved=1), ), using_qnn:true] (%11665:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=1)]) -> (%11666:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.24.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010, solved=1), ), using_qnn:true] (%11666:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010, solved=1)], %11667:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1011, solved=1)]) -> (%11668:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.24.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1012, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1013, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11669:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1012, solved=1), constant:[0]]) -> (%11670:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1013, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.24.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1013, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=1), ), using_qnn:true] (%11670:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1013, solved=1)], %11665:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1008, solved=1)], %11668:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1010, solved=1)]) -> (%11671:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.24.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1015, solved=1), ), using_qnn:true] (%11671:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1014, solved=1)]) -> (%11672:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1015, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.24.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1015, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=1), ), using_qnn:true] (%11672:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1015, solved=1)], %11662:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=63, solved=1)]) -> (%11673:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.24.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=1), ), using_qnn:true] (%11673:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=1)]) -> (%11674:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.24.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=1), ), using_qnn:true] (%11674:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=1)]) -> (%11675:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.24.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1017, solved=1)), using_qnn:true] (%11675:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1016, solved=1)]) -> (%11676:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.24.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=1), ), using_qnn:true] (%11676:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=1)]) -> (%11677:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.24.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11624:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11677:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1018, solved=1)]) -> (%11678:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.24.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1020, solved=1)), using_qnn:true] (%11678:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11679:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.24.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=1), ), using_qnn:true] (%11679:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=1)]) -> (%11680:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.24.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1022, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1021, solved=1)), using_qnn:true] (%11680:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=1)]) -> (%11681:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1022, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.24.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1022, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1022, solved=1), ), using_qnn:true] (%11681:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1022, solved=1)]) -> (%11682:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1022, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.24.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1023, solved=1)), using_qnn:true] (%11680:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1019, solved=1)]) -> (%11683:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.24.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1), ), using_qnn:true] (%11683:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1)]) -> (%11684:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.24.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1025, solved=1), ), using_qnn:true] (%11684:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1)]) -> (%11685:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1025, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.24.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1025, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1), ), using_qnn:true] (%11684:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1)], %11685:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1025, solved=1)]) -> (%11686:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.24.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1022, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1), ), using_qnn:true] (%11686:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1)], %11682:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1022, solved=1)]) -> (%11687:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.24.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1), ), using_qnn:true] (%11687:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1)]) -> (%11688:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.24.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1026, solved=1)), using_qnn:true] (%11688:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1024, solved=1)]) -> (%11689:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.24.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=1), ), using_qnn:true] (%11689:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=1)]) -> (%11690:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.24.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11678:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11690:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1027, solved=1)]) -> (%11691:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.25.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1029, solved=1)), using_qnn:true] (%11691:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11692:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.25.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=1), ), using_qnn:true] (%11692:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=1)]) -> (%11693:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.25.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1031, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1030, solved=1)), using_qnn:true] (%11693:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=1)]) -> (%11694:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1031, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.25.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1032, solved=1)), using_qnn:true] (%11693:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=1)]) -> (%11695:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.25.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1035, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1034, solved=1)), using_qnn:true] (%11693:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1028, solved=1)]) -> (%11696:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1035, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.25.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1031, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1031, solved=1), ), using_qnn:true] (%11694:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1031, solved=1)]) -> (%11697:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1031, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.25.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1031, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1031, solved=1), ), using_qnn:true] (%11697:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1031, solved=1)]) -> (%11698:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1031, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.25.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=1), ), using_qnn:true] (%11695:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=1)]) -> (%11699:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.25.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=1), ), using_qnn:true] (%11699:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=1)]) -> (%11700:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.25.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1035, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1035, solved=1), ), using_qnn:true] (%11696:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1035, solved=1)]) -> (%11701:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1035, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.25.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1035, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1035, solved=1), ), using_qnn:true] (%11701:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1035, solved=1)]) -> (%11702:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1035, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.25.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1031, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1037, solved=1)), using_qnn:true] (%11698:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1031, solved=1)]) -> (%11703:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.25.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1039, solved=1)), using_qnn:true] (%11700:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1033, solved=1)]) -> (%11704:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.25.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.25.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.25.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1), ), using_qnn:true] (%11703:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1)]) -> (%11705:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.25.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1), ), using_qnn:true] (%11703:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1)]) -> (%11706:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.25.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=1), ), using_qnn:true] (%11706:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1)]) -> (%11707:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.25.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=1), ), using_qnn:true] (%11707:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=1)], %11705:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1)]) -> (%11708:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.25.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=1), ), using_qnn:true] (%11708:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11709:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.25.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1), ), using_qnn:true] (%11703:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11710:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.25.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1), ), using_qnn:true] (%11710:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1)], %11709:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1040, solved=1)]) -> (%11711:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.25.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1), ), using_qnn:true] (%11704:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1)]) -> (%11712:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.25.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1), ), using_qnn:true] (%11704:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1)]) -> (%11713:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.25.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1041, solved=1), ), using_qnn:true] (%11713:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1)]) -> (%11714:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1041, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.25.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1041, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1041, solved=1), ), using_qnn:true] (%11714:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1041, solved=1)], %11712:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1)]) -> (%11715:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1041, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.25.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1041, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1041, solved=1), ), using_qnn:true] (%11715:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1041, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11716:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1041, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.25.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1), ), using_qnn:true] (%11704:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11717:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.25.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1041, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1), ), using_qnn:true] (%11717:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1)], %11716:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1041, solved=1)]) -> (%11718:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.25.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1042, solved=1), ), using_qnn:true] (%11718:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1038, solved=1)]) -> (%11719:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1042, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.25.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1042, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1), ), using_qnn:true] (%11719:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1042, solved=1)]) -> (%11720:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.25.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1), ), using_qnn:true] (%11720:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1)]) -> (%11722:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.25.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1035, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1044, solved=1), ), using_qnn:true] (%11702:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1035, solved=1)]) -> (%11723:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1044, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.25.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1044, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1045, solved=1), ), using_qnn:true] (%11723:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1044, solved=1)]) -> (%11724:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1045, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.25.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1), ), using_qnn:true] (%9991:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1)], %11722:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1)]) -> (%11726:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.25.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1045, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1), ), using_qnn:true] (%9992:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1)], %11724:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1045, solved=1)]) -> (%11727:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.25.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1), ), using_qnn:true] (%11726:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1)]) -> (%11728:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.25.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1), ), using_qnn:true] (%11727:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1)]) -> (%11729:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.25.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=1), ), using_qnn:true] (%11711:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1036, solved=1)], %11728:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=28, solved=1)]) -> (%11730:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.25.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1047, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=1), ), using_qnn:true] (%11730:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=1)], %11731:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1047, solved=1)]) -> (%11732:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.25.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1048, solved=1), ), using_qnn:true] (%11732:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=1)]) -> (%11733:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1048, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.25.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1048, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1048, solved=1), ), using_qnn:true] (%11733:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1048, solved=1)], %11734:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1049, solved=1)]) -> (%11735:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1048, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.25.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1050, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1051, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11736:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1050, solved=1), constant:[0]]) -> (%11737:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1051, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.25.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1051, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1048, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1052, solved=1), ), using_qnn:true] (%11737:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1051, solved=1)], %11732:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1046, solved=1)], %11735:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1048, solved=1)]) -> (%11738:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1052, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.25.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1052, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1053, solved=1), ), using_qnn:true] (%11738:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1052, solved=1)]) -> (%11739:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1053, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.25.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1053, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1054, solved=1), ), using_qnn:true] (%11739:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1053, solved=1)], %11729:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=64, solved=1)]) -> (%11740:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1054, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.25.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1054, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1054, solved=1), ), using_qnn:true] (%11740:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1054, solved=1)]) -> (%11741:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1054, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.25.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1054, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1054, solved=1), ), using_qnn:true] (%11741:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1054, solved=1)]) -> (%11742:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1054, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.25.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1054, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1056, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1055, solved=1)), using_qnn:true] (%11742:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1054, solved=1)]) -> (%11743:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1056, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.25.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1056, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1056, solved=1), ), using_qnn:true] (%11743:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1056, solved=1)]) -> (%11744:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1056, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.25.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1056, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11691:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11744:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1056, solved=1)]) -> (%11745:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.25.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1057, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1058, solved=1)), using_qnn:true] (%11745:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11746:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1057, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.25.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1057, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1057, solved=1), ), using_qnn:true] (%11746:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1057, solved=1)]) -> (%11747:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1057, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.25.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1057, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1060, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1059, solved=1)), using_qnn:true] (%11747:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1057, solved=1)]) -> (%11748:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1060, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.25.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1060, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1060, solved=1), ), using_qnn:true] (%11748:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1060, solved=1)]) -> (%11749:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1060, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.25.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1057, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1061, solved=1)), using_qnn:true] (%11747:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1057, solved=1)]) -> (%11750:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.25.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1), ), using_qnn:true] (%11750:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1)]) -> (%11751:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.25.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1063, solved=1), ), using_qnn:true] (%11751:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1)]) -> (%11752:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1063, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.25.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1063, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1), ), using_qnn:true] (%11751:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1)], %11752:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1063, solved=1)]) -> (%11753:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.25.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1060, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1), ), using_qnn:true] (%11753:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1)], %11749:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1060, solved=1)]) -> (%11754:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.25.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1), ), using_qnn:true] (%11754:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1)]) -> (%11755:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.25.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1065, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1064, solved=1)), using_qnn:true] (%11755:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1062, solved=1)]) -> (%11756:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1065, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.25.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1065, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1065, solved=1), ), using_qnn:true] (%11756:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1065, solved=1)]) -> (%11757:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1065, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.25.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1065, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11745:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11757:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1065, solved=1)]) -> (%11758:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.26.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1066, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1067, solved=1)), using_qnn:true] (%11758:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11759:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1066, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.26.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1066, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1066, solved=1), ), using_qnn:true] (%11759:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1066, solved=1)]) -> (%11760:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1066, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.26.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1066, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1069, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1068, solved=1)), using_qnn:true] (%11760:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1066, solved=1)]) -> (%11761:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1069, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.26.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1066, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1071, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1070, solved=1)), using_qnn:true] (%11760:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1066, solved=1)]) -> (%11762:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1071, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.26.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1066, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1073, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1072, solved=1)), using_qnn:true] (%11760:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1066, solved=1)]) -> (%11763:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1073, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.26.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1069, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1069, solved=1), ), using_qnn:true] (%11761:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1069, solved=1)]) -> (%11764:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1069, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.26.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1069, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1069, solved=1), ), using_qnn:true] (%11764:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1069, solved=1)]) -> (%11765:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1069, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.26.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1071, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1071, solved=1), ), using_qnn:true] (%11762:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1071, solved=1)]) -> (%11766:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1071, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.26.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1071, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1071, solved=1), ), using_qnn:true] (%11766:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1071, solved=1)]) -> (%11767:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1071, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.26.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1073, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1073, solved=1), ), using_qnn:true] (%11763:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1073, solved=1)]) -> (%11768:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1073, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.26.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1073, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1073, solved=1), ), using_qnn:true] (%11768:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1073, solved=1)]) -> (%11769:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1073, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.26.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1069, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1075, solved=1)), using_qnn:true] (%11765:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1069, solved=1)]) -> (%11770:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.26.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1071, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1077, solved=1)), using_qnn:true] (%11767:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1071, solved=1)]) -> (%11771:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.26.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.26.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.26.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1), ), using_qnn:true] (%11770:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1)]) -> (%11772:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.26.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1), ), using_qnn:true] (%11770:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1)]) -> (%11773:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.26.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1078, solved=1), ), using_qnn:true] (%11773:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1)]) -> (%11774:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1078, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.26.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1078, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1078, solved=1), ), using_qnn:true] (%11774:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1078, solved=1)], %11772:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1)]) -> (%11775:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1078, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.26.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1078, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1078, solved=1), ), using_qnn:true] (%11775:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1078, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11776:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1078, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.26.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1), ), using_qnn:true] (%11770:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11777:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.26.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1078, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1), ), using_qnn:true] (%11777:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1)], %11776:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1078, solved=1)]) -> (%11778:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.26.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1), ), using_qnn:true] (%11771:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1)]) -> (%11779:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.26.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1), ), using_qnn:true] (%11771:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1)]) -> (%11780:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.26.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1079, solved=1), ), using_qnn:true] (%11780:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1)]) -> (%11781:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1079, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.26.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1079, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1079, solved=1), ), using_qnn:true] (%11781:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1079, solved=1)], %11779:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1)]) -> (%11782:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1079, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.26.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1079, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1079, solved=1), ), using_qnn:true] (%11782:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1079, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11783:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1079, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.26.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1), ), using_qnn:true] (%11771:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11784:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.26.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1079, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1), ), using_qnn:true] (%11784:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1)], %11783:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1079, solved=1)]) -> (%11785:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.26.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1080, solved=1), ), using_qnn:true] (%11785:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1076, solved=1)]) -> (%11786:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1080, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.26.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1080, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1), ), using_qnn:true] (%11786:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1080, solved=1)]) -> (%11787:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.26.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1), ), using_qnn:true] (%11787:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1)]) -> (%11789:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.26.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1073, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1082, solved=1), ), using_qnn:true] (%11769:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1073, solved=1)]) -> (%11790:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1082, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.26.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1082, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1083, solved=1), ), using_qnn:true] (%11790:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1082, solved=1)]) -> (%11791:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1083, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.26.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1), ), using_qnn:true] (%9993:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1)], %11789:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1)]) -> (%11793:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.26.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1083, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1), ), using_qnn:true] (%9994:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1)], %11791:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1083, solved=1)]) -> (%11794:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.26.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1), ), using_qnn:true] (%11793:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1)]) -> (%11795:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.26.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1), ), using_qnn:true] (%11794:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1)]) -> (%11796:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.26.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1084, solved=1), ), using_qnn:true] (%11778:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1074, solved=1)], %11795:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=29, solved=1)]) -> (%11797:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1084, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.26.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1084, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1085, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1084, solved=1), ), using_qnn:true] (%11797:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1084, solved=1)], %11798:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1085, solved=1)]) -> (%11799:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1084, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.26.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1084, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1086, solved=1), ), using_qnn:true] (%11799:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1084, solved=1)]) -> (%11800:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1086, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.26.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1086, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1087, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1086, solved=1), ), using_qnn:true] (%11800:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1086, solved=1)], %11801:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1087, solved=1)]) -> (%11802:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1086, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.26.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1088, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1089, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11803:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1088, solved=1), constant:[0]]) -> (%11804:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1089, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.26.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1089, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1084, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1086, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1090, solved=1), ), using_qnn:true] (%11804:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1089, solved=1)], %11799:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1084, solved=1)], %11802:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1086, solved=1)]) -> (%11805:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1090, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.26.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1090, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1091, solved=1), ), using_qnn:true] (%11805:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1090, solved=1)]) -> (%11806:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1091, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.26.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1091, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1092, solved=1), ), using_qnn:true] (%11806:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1091, solved=1)], %11796:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=65, solved=1)]) -> (%11807:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1092, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.26.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1092, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1092, solved=1), ), using_qnn:true] (%11807:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1092, solved=1)]) -> (%11808:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1092, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.26.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1092, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1092, solved=1), ), using_qnn:true] (%11808:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1092, solved=1)]) -> (%11809:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1092, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.26.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1092, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1094, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1093, solved=1)), using_qnn:true] (%11809:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1092, solved=1)]) -> (%11810:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1094, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.26.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1094, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1094, solved=1), ), using_qnn:true] (%11810:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1094, solved=1)]) -> (%11811:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1094, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.26.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1094, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11758:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11811:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1094, solved=1)]) -> (%11812:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.26.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1095, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1096, solved=1)), using_qnn:true] (%11812:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11813:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1095, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.26.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1095, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1095, solved=1), ), using_qnn:true] (%11813:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1095, solved=1)]) -> (%11814:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1095, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.26.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1095, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1098, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1097, solved=1)), using_qnn:true] (%11814:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1095, solved=1)]) -> (%11815:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1098, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.26.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1098, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1098, solved=1), ), using_qnn:true] (%11815:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1098, solved=1)]) -> (%11816:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1098, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.26.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1095, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1099, solved=1)), using_qnn:true] (%11814:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1095, solved=1)]) -> (%11817:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.26.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1), ), using_qnn:true] (%11817:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1)]) -> (%11818:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.26.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1101, solved=1), ), using_qnn:true] (%11818:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1)]) -> (%11819:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1101, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.26.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1101, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1), ), using_qnn:true] (%11818:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1)], %11819:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1101, solved=1)]) -> (%11820:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.26.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1098, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1), ), using_qnn:true] (%11820:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1)], %11816:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1098, solved=1)]) -> (%11821:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.26.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1), ), using_qnn:true] (%11821:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1)]) -> (%11822:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.26.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1103, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1102, solved=1)), using_qnn:true] (%11822:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1100, solved=1)]) -> (%11823:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1103, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.26.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1103, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1103, solved=1), ), using_qnn:true] (%11823:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1103, solved=1)]) -> (%11824:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1103, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.26.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1103, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11812:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11824:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1103, solved=1)]) -> (%11825:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.27.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1104, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1105, solved=1)), using_qnn:true] (%11825:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11826:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1104, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.27.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1104, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1104, solved=1), ), using_qnn:true] (%11826:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1104, solved=1)]) -> (%11827:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1104, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.27.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1104, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1107, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1106, solved=1)), using_qnn:true] (%11827:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1104, solved=1)]) -> (%11828:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1107, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.27.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1104, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1109, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1108, solved=1)), using_qnn:true] (%11827:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1104, solved=1)]) -> (%11829:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1109, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.27.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1104, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1111, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1110, solved=1)), using_qnn:true] (%11827:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1104, solved=1)]) -> (%11830:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1111, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.27.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1107, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1107, solved=1), ), using_qnn:true] (%11828:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1107, solved=1)]) -> (%11831:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1107, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.27.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1107, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1107, solved=1), ), using_qnn:true] (%11831:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1107, solved=1)]) -> (%11832:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1107, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.27.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1109, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1109, solved=1), ), using_qnn:true] (%11829:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1109, solved=1)]) -> (%11833:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1109, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.27.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1109, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1109, solved=1), ), using_qnn:true] (%11833:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1109, solved=1)]) -> (%11834:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1109, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.27.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1111, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1111, solved=1), ), using_qnn:true] (%11830:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1111, solved=1)]) -> (%11835:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1111, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.27.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1111, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1111, solved=1), ), using_qnn:true] (%11835:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1111, solved=1)]) -> (%11836:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1111, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.27.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1107, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1113, solved=1)), using_qnn:true] (%11832:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1107, solved=1)]) -> (%11837:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.27.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1109, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1115, solved=1)), using_qnn:true] (%11834:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1109, solved=1)]) -> (%11838:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.27.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.27.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.27.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1), ), using_qnn:true] (%11837:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1)]) -> (%11839:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.27.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1), ), using_qnn:true] (%11837:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1)]) -> (%11840:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.27.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1116, solved=1), ), using_qnn:true] (%11840:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1)]) -> (%11841:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1116, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.27.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1116, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1116, solved=1), ), using_qnn:true] (%11841:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1116, solved=1)], %11839:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1)]) -> (%11842:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1116, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.27.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1116, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1116, solved=1), ), using_qnn:true] (%11842:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1116, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11843:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1116, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.27.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1), ), using_qnn:true] (%11837:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11844:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.27.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1116, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1), ), using_qnn:true] (%11844:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1)], %11843:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1116, solved=1)]) -> (%11845:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.27.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1), ), using_qnn:true] (%11838:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1)]) -> (%11846:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.27.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1), ), using_qnn:true] (%11838:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1)]) -> (%11847:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.27.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1117, solved=1), ), using_qnn:true] (%11847:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1)]) -> (%11848:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1117, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.27.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1117, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1117, solved=1), ), using_qnn:true] (%11848:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1117, solved=1)], %11846:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1)]) -> (%11849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1117, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.27.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1117, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1117, solved=1), ), using_qnn:true] (%11849:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1117, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11850:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1117, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.27.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1), ), using_qnn:true] (%11838:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11851:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.27.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1117, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1), ), using_qnn:true] (%11851:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1)], %11850:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1117, solved=1)]) -> (%11852:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.27.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1118, solved=1), ), using_qnn:true] (%11852:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1114, solved=1)]) -> (%11853:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1118, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.27.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1118, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1), ), using_qnn:true] (%11853:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1118, solved=1)]) -> (%11854:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.27.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1), ), using_qnn:true] (%11854:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1)]) -> (%11856:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.27.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1111, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1120, solved=1), ), using_qnn:true] (%11836:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1111, solved=1)]) -> (%11857:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1120, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.27.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1120, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1121, solved=1), ), using_qnn:true] (%11857:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1120, solved=1)]) -> (%11858:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1121, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.27.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1), ), using_qnn:true] (%9995:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1)], %11856:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1)]) -> (%11860:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.27.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1121, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1), ), using_qnn:true] (%9996:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1)], %11858:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1121, solved=1)]) -> (%11861:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.27.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1), ), using_qnn:true] (%11860:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1)]) -> (%11862:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.27.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1), ), using_qnn:true] (%11861:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1)]) -> (%11863:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.27.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1122, solved=1), ), using_qnn:true] (%11845:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1112, solved=1)], %11862:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=30, solved=1)]) -> (%11864:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1122, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.27.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1122, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1123, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1122, solved=1), ), using_qnn:true] (%11864:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1122, solved=1)], %11865:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1123, solved=1)]) -> (%11866:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1122, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.27.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1122, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1124, solved=1), ), using_qnn:true] (%11866:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1122, solved=1)]) -> (%11867:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1124, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.27.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1124, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1125, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1124, solved=1), ), using_qnn:true] (%11867:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1124, solved=1)], %11868:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1125, solved=1)]) -> (%11869:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1124, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.27.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1126, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1127, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11870:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1126, solved=1), constant:[0]]) -> (%11871:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1127, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.27.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1127, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1122, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1124, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1128, solved=1), ), using_qnn:true] (%11871:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1127, solved=1)], %11866:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1122, solved=1)], %11869:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1124, solved=1)]) -> (%11872:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1128, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.27.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1128, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1129, solved=1), ), using_qnn:true] (%11872:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1128, solved=1)]) -> (%11873:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1129, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.27.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1129, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1130, solved=1), ), using_qnn:true] (%11873:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1129, solved=1)], %11863:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=66, solved=1)]) -> (%11874:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1130, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.27.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1130, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1130, solved=1), ), using_qnn:true] (%11874:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1130, solved=1)]) -> (%11875:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1130, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.27.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1130, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1130, solved=1), ), using_qnn:true] (%11875:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1130, solved=1)]) -> (%11876:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1130, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.27.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1130, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1132, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1131, solved=1)), using_qnn:true] (%11876:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1130, solved=1)]) -> (%11877:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1132, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.27.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1132, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1132, solved=1), ), using_qnn:true] (%11877:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1132, solved=1)]) -> (%11878:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1132, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.27.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1132, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11825:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11878:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1132, solved=1)]) -> (%11879:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.27.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1133, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1134, solved=1)), using_qnn:true] (%11879:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11880:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1133, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.27.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1133, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1133, solved=1), ), using_qnn:true] (%11880:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1133, solved=1)]) -> (%11881:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1133, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.27.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1133, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1136, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1135, solved=1)), using_qnn:true] (%11881:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1133, solved=1)]) -> (%11882:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1136, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.27.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1136, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1136, solved=1), ), using_qnn:true] (%11882:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1136, solved=1)]) -> (%11883:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1136, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.27.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1133, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1137, solved=1)), using_qnn:true] (%11881:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1133, solved=1)]) -> (%11884:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.27.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1), ), using_qnn:true] (%11884:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1)]) -> (%11885:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.27.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1139, solved=1), ), using_qnn:true] (%11885:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1)]) -> (%11886:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1139, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.27.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1139, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1), ), using_qnn:true] (%11885:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1)], %11886:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1139, solved=1)]) -> (%11887:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.27.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1136, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1), ), using_qnn:true] (%11887:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1)], %11883:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1136, solved=1)]) -> (%11888:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.27.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1), ), using_qnn:true] (%11888:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1)]) -> (%11889:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.27.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1141, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1140, solved=1)), using_qnn:true] (%11889:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1138, solved=1)]) -> (%11890:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1141, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.27.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1141, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1141, solved=1), ), using_qnn:true] (%11890:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1141, solved=1)]) -> (%11891:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1141, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.27.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1141, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11879:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11891:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1141, solved=1)]) -> (%11892:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.28.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1142, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1143, solved=1)), using_qnn:true] (%11892:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11893:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1142, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.28.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1142, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1142, solved=1), ), using_qnn:true] (%11893:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1142, solved=1)]) -> (%11894:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1142, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.28.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1142, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1145, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1144, solved=1)), using_qnn:true] (%11894:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1142, solved=1)]) -> (%11895:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1145, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.28.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1142, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1147, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1146, solved=1)), using_qnn:true] (%11894:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1142, solved=1)]) -> (%11896:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1147, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.28.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1142, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1149, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1148, solved=1)), using_qnn:true] (%11894:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1142, solved=1)]) -> (%11897:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1149, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.28.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1145, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1145, solved=1), ), using_qnn:true] (%11895:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1145, solved=1)]) -> (%11898:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1145, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.28.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1145, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1145, solved=1), ), using_qnn:true] (%11898:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1145, solved=1)]) -> (%11899:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1145, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.28.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1147, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1147, solved=1), ), using_qnn:true] (%11896:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1147, solved=1)]) -> (%11900:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1147, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.28.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1147, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1147, solved=1), ), using_qnn:true] (%11900:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1147, solved=1)]) -> (%11901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1147, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.28.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1149, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1149, solved=1), ), using_qnn:true] (%11897:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1149, solved=1)]) -> (%11902:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1149, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.28.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1149, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1149, solved=1), ), using_qnn:true] (%11902:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1149, solved=1)]) -> (%11903:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1149, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.28.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1145, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1151, solved=1)), using_qnn:true] (%11899:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1145, solved=1)]) -> (%11904:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.28.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1147, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1153, solved=1)), using_qnn:true] (%11901:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1147, solved=1)]) -> (%11905:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.28.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.28.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.28.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1), ), using_qnn:true] (%11904:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1)]) -> (%11906:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.28.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1), ), using_qnn:true] (%11904:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1)]) -> (%11907:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.28.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1154, solved=1), ), using_qnn:true] (%11907:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1)]) -> (%11908:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1154, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.28.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1154, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1154, solved=1), ), using_qnn:true] (%11908:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1154, solved=1)], %11906:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1)]) -> (%11909:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1154, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.28.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1154, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1154, solved=1), ), using_qnn:true] (%11909:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1154, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11910:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1154, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.28.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1), ), using_qnn:true] (%11904:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11911:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.28.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1154, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1), ), using_qnn:true] (%11911:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1)], %11910:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1154, solved=1)]) -> (%11912:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.28.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1), ), using_qnn:true] (%11905:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1)]) -> (%11913:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.28.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1), ), using_qnn:true] (%11905:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1)]) -> (%11914:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.28.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1155, solved=1), ), using_qnn:true] (%11914:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1)]) -> (%11915:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1155, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.28.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1155, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1155, solved=1), ), using_qnn:true] (%11915:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1155, solved=1)], %11913:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1)]) -> (%11916:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1155, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.28.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1155, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1155, solved=1), ), using_qnn:true] (%11916:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1155, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11917:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1155, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.28.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1), ), using_qnn:true] (%11905:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11918:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.28.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1155, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1), ), using_qnn:true] (%11918:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1)], %11917:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1155, solved=1)]) -> (%11919:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.28.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1156, solved=1), ), using_qnn:true] (%11919:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1152, solved=1)]) -> (%11920:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1156, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.28.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1156, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1), ), using_qnn:true] (%11920:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1156, solved=1)]) -> (%11921:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.28.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1), ), using_qnn:true] (%11921:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1)]) -> (%11923:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.28.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1149, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1158, solved=1), ), using_qnn:true] (%11903:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1149, solved=1)]) -> (%11924:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1158, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.28.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1158, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1159, solved=1), ), using_qnn:true] (%11924:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1158, solved=1)]) -> (%11925:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1159, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.28.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1), ), using_qnn:true] (%9997:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1)], %11923:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1)]) -> (%11927:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.28.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1159, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1), ), using_qnn:true] (%9998:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1)], %11925:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1159, solved=1)]) -> (%11928:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.28.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1), ), using_qnn:true] (%11927:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1)]) -> (%11929:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.28.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1), ), using_qnn:true] (%11928:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1)]) -> (%11930:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.28.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1160, solved=1), ), using_qnn:true] (%11912:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1150, solved=1)], %11929:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=31, solved=1)]) -> (%11931:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1160, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.28.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1160, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1161, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1160, solved=1), ), using_qnn:true] (%11931:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1160, solved=1)], %11932:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1161, solved=1)]) -> (%11933:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1160, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.28.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1160, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1162, solved=1), ), using_qnn:true] (%11933:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1160, solved=1)]) -> (%11934:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1162, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.28.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1162, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1163, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1162, solved=1), ), using_qnn:true] (%11934:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1162, solved=1)], %11935:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1163, solved=1)]) -> (%11936:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1162, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.28.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1164, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1165, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %11937:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1164, solved=1), constant:[0]]) -> (%11938:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1165, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.28.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1165, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1160, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1162, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1166, solved=1), ), using_qnn:true] (%11938:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1165, solved=1)], %11933:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1160, solved=1)], %11936:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1162, solved=1)]) -> (%11939:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1166, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.28.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1166, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1167, solved=1), ), using_qnn:true] (%11939:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1166, solved=1)]) -> (%11940:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1167, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.28.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1167, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1168, solved=1), ), using_qnn:true] (%11940:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1167, solved=1)], %11930:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=67, solved=1)]) -> (%11941:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1168, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.28.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1168, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1168, solved=1), ), using_qnn:true] (%11941:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1168, solved=1)]) -> (%11942:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1168, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.28.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1168, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1168, solved=1), ), using_qnn:true] (%11942:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1168, solved=1)]) -> (%11943:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1168, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.28.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1168, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1170, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1169, solved=1)), using_qnn:true] (%11943:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1168, solved=1)]) -> (%11944:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1170, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.28.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1170, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1170, solved=1), ), using_qnn:true] (%11944:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1170, solved=1)]) -> (%11945:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1170, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.28.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1170, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11892:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11945:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1170, solved=1)]) -> (%11946:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.28.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1171, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1172, solved=1)), using_qnn:true] (%11946:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11947:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1171, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.28.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1171, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1171, solved=1), ), using_qnn:true] (%11947:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1171, solved=1)]) -> (%11948:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1171, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.28.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1171, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1174, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1173, solved=1)), using_qnn:true] (%11948:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1171, solved=1)]) -> (%11949:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1174, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.28.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1174, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1174, solved=1), ), using_qnn:true] (%11949:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1174, solved=1)]) -> (%11950:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1174, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.28.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1171, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1175, solved=1)), using_qnn:true] (%11948:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1171, solved=1)]) -> (%11951:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.28.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1), ), using_qnn:true] (%11951:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1)]) -> (%11952:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.28.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1177, solved=1), ), using_qnn:true] (%11952:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1)]) -> (%11953:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1177, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.28.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1177, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1), ), using_qnn:true] (%11952:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1)], %11953:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1177, solved=1)]) -> (%11954:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.28.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1174, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1), ), using_qnn:true] (%11954:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1)], %11950:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1174, solved=1)]) -> (%11955:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.28.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1), ), using_qnn:true] (%11955:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1)]) -> (%11956:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.28.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1179, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1178, solved=1)), using_qnn:true] (%11956:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1176, solved=1)]) -> (%11957:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1179, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.28.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1179, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1179, solved=1), ), using_qnn:true] (%11957:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1179, solved=1)]) -> (%11958:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1179, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.28.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1179, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11946:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %11958:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1179, solved=1)]) -> (%11959:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.29.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1180, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1181, solved=1)), using_qnn:true] (%11959:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%11960:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1180, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.29.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1180, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1180, solved=1), ), using_qnn:true] (%11960:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1180, solved=1)]) -> (%11961:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1180, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.29.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1180, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1183, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1182, solved=1)), using_qnn:true] (%11961:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1180, solved=1)]) -> (%11962:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1183, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.29.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1180, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1185, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1184, solved=1)), using_qnn:true] (%11961:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1180, solved=1)]) -> (%11963:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1185, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.29.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1180, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1187, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1186, solved=1)), using_qnn:true] (%11961:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1180, solved=1)]) -> (%11964:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1187, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.29.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1183, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1183, solved=1), ), using_qnn:true] (%11962:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1183, solved=1)]) -> (%11965:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1183, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.29.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1183, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1183, solved=1), ), using_qnn:true] (%11965:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1183, solved=1)]) -> (%11966:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1183, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.29.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1185, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1185, solved=1), ), using_qnn:true] (%11963:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1185, solved=1)]) -> (%11967:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1185, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.29.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1185, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1185, solved=1), ), using_qnn:true] (%11967:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1185, solved=1)]) -> (%11968:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1185, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.29.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1187, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1187, solved=1), ), using_qnn:true] (%11964:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1187, solved=1)]) -> (%11969:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1187, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.29.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1187, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1187, solved=1), ), using_qnn:true] (%11969:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1187, solved=1)]) -> (%11970:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1187, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.29.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1183, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1189, solved=1)), using_qnn:true] (%11966:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1183, solved=1)]) -> (%11971:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.29.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1185, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1191, solved=1)), using_qnn:true] (%11968:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1185, solved=1)]) -> (%11972:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.29.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.29.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.29.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1), ), using_qnn:true] (%11971:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1)]) -> (%11973:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.29.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1), ), using_qnn:true] (%11971:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1)]) -> (%11974:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.29.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1192, solved=1), ), using_qnn:true] (%11974:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1)]) -> (%11975:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1192, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.29.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1192, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1192, solved=1), ), using_qnn:true] (%11975:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1192, solved=1)], %11973:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1)]) -> (%11976:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1192, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.29.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1192, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1192, solved=1), ), using_qnn:true] (%11976:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1192, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11977:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1192, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.29.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1), ), using_qnn:true] (%11971:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11978:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.29.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1192, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1), ), using_qnn:true] (%11978:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1)], %11977:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1192, solved=1)]) -> (%11979:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.29.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1), ), using_qnn:true] (%11972:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1)]) -> (%11980:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.29.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1), ), using_qnn:true] (%11972:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1)]) -> (%11981:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.29.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1193, solved=1), ), using_qnn:true] (%11981:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1)]) -> (%11982:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1193, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.29.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1193, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1193, solved=1), ), using_qnn:true] (%11982:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1193, solved=1)], %11980:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1)]) -> (%11983:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1193, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.29.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1193, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1193, solved=1), ), using_qnn:true] (%11983:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1193, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%11984:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1193, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.29.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1), ), using_qnn:true] (%11972:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%11985:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.29.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1193, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1), ), using_qnn:true] (%11985:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1)], %11984:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1193, solved=1)]) -> (%11986:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.29.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1194, solved=1), ), using_qnn:true] (%11986:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1190, solved=1)]) -> (%11987:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1194, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.29.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1194, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1), ), using_qnn:true] (%11987:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1194, solved=1)]) -> (%11988:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.29.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1), ), using_qnn:true] (%11988:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1)]) -> (%11990:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.29.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1187, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1196, solved=1), ), using_qnn:true] (%11970:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1187, solved=1)]) -> (%11991:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1196, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.29.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1196, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1197, solved=1), ), using_qnn:true] (%11991:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1196, solved=1)]) -> (%11992:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1197, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.29.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1), ), using_qnn:true] (%9999:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1)], %11990:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1)]) -> (%11994:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.29.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1197, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1), ), using_qnn:true] (%10000:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1)], %11992:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1197, solved=1)]) -> (%11995:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.29.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1), ), using_qnn:true] (%11994:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1)]) -> (%11996:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.29.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1), ), using_qnn:true] (%11995:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1)]) -> (%11997:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.29.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1198, solved=1), ), using_qnn:true] (%11979:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1188, solved=1)], %11996:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=32, solved=1)]) -> (%11998:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1198, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.29.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1198, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1199, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1198, solved=1), ), using_qnn:true] (%11998:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1198, solved=1)], %11999:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1199, solved=1)]) -> (%12000:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1198, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.29.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1198, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1200, solved=1), ), using_qnn:true] (%12000:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1198, solved=1)]) -> (%12001:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1200, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.29.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1200, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1201, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1200, solved=1), ), using_qnn:true] (%12001:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1200, solved=1)], %12002:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1201, solved=1)]) -> (%12003:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1200, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.29.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1202, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1203, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %12004:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1202, solved=1), constant:[0]]) -> (%12005:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1203, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.29.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1203, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1198, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1200, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1204, solved=1), ), using_qnn:true] (%12005:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1203, solved=1)], %12000:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1198, solved=1)], %12003:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1200, solved=1)]) -> (%12006:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1204, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.29.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1204, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1205, solved=1), ), using_qnn:true] (%12006:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1204, solved=1)]) -> (%12007:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1205, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.29.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1205, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1206, solved=1), ), using_qnn:true] (%12007:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1205, solved=1)], %11997:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=68, solved=1)]) -> (%12008:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1206, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.29.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1206, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1206, solved=1), ), using_qnn:true] (%12008:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1206, solved=1)]) -> (%12009:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1206, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.29.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1206, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1206, solved=1), ), using_qnn:true] (%12009:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1206, solved=1)]) -> (%12010:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1206, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.29.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1206, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1208, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1207, solved=1)), using_qnn:true] (%12010:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1206, solved=1)]) -> (%12011:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1208, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.29.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1208, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1208, solved=1), ), using_qnn:true] (%12011:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1208, solved=1)]) -> (%12012:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1208, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.29.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1208, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%11959:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12012:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1208, solved=1)]) -> (%12013:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.29.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1209, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1210, solved=1)), using_qnn:true] (%12013:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12014:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1209, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.29.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1209, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1209, solved=1), ), using_qnn:true] (%12014:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1209, solved=1)]) -> (%12015:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1209, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.29.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1209, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1212, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1211, solved=1)), using_qnn:true] (%12015:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1209, solved=1)]) -> (%12016:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1212, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.29.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1212, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1212, solved=1), ), using_qnn:true] (%12016:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1212, solved=1)]) -> (%12017:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1212, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.29.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1209, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1213, solved=1)), using_qnn:true] (%12015:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1209, solved=1)]) -> (%12018:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.29.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1), ), using_qnn:true] (%12018:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1)]) -> (%12019:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.29.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1215, solved=1), ), using_qnn:true] (%12019:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1)]) -> (%12020:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1215, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.29.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1215, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1), ), using_qnn:true] (%12019:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1)], %12020:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1215, solved=1)]) -> (%12021:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.29.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1212, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1), ), using_qnn:true] (%12021:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1)], %12017:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1212, solved=1)]) -> (%12022:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.29.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1), ), using_qnn:true] (%12022:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1)]) -> (%12023:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.29.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1217, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1216, solved=1)), using_qnn:true] (%12023:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1214, solved=1)]) -> (%12024:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1217, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.29.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1217, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1217, solved=1), ), using_qnn:true] (%12024:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1217, solved=1)]) -> (%12025:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1217, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.29.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1217, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%12013:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12025:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1217, solved=1)]) -> (%12026:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.30.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1218, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1219, solved=1)), using_qnn:true] (%12026:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12027:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1218, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.30.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1218, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1218, solved=1), ), using_qnn:true] (%12027:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1218, solved=1)]) -> (%12028:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1218, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.30.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1218, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1221, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1220, solved=1)), using_qnn:true] (%12028:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1218, solved=1)]) -> (%12029:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1221, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.30.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1218, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1223, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1222, solved=1)), using_qnn:true] (%12028:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1218, solved=1)]) -> (%12030:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1223, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.30.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1218, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1225, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1224, solved=1)), using_qnn:true] (%12028:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1218, solved=1)]) -> (%12031:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1225, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.30.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1221, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1221, solved=1), ), using_qnn:true] (%12029:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1221, solved=1)]) -> (%12032:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1221, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.30.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1221, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1221, solved=1), ), using_qnn:true] (%12032:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1221, solved=1)]) -> (%12033:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1221, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.30.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1223, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1223, solved=1), ), using_qnn:true] (%12030:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1223, solved=1)]) -> (%12034:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1223, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.30.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1223, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1223, solved=1), ), using_qnn:true] (%12034:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1223, solved=1)]) -> (%12035:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1223, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.30.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1225, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1225, solved=1), ), using_qnn:true] (%12031:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1225, solved=1)]) -> (%12036:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1225, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.30.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1225, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1225, solved=1), ), using_qnn:true] (%12036:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1225, solved=1)]) -> (%12037:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1225, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.30.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1221, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1227, solved=1)), using_qnn:true] (%12033:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1221, solved=1)]) -> (%12038:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.30.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1223, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1229, solved=1)), using_qnn:true] (%12035:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1223, solved=1)]) -> (%12039:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.30.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.30.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.30.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1), ), using_qnn:true] (%12038:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1)]) -> (%12040:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.30.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1), ), using_qnn:true] (%12038:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1)]) -> (%12041:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.30.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1230, solved=1), ), using_qnn:true] (%12041:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1)]) -> (%12042:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1230, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.30.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1230, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1230, solved=1), ), using_qnn:true] (%12042:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1230, solved=1)], %12040:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1)]) -> (%12043:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1230, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.30.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1230, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1230, solved=1), ), using_qnn:true] (%12043:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1230, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%12044:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1230, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.30.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1), ), using_qnn:true] (%12038:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%12045:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.30.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1230, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1), ), using_qnn:true] (%12045:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1)], %12044:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1230, solved=1)]) -> (%12046:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.30.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1), ), using_qnn:true] (%12039:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1)]) -> (%12047:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.30.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1), ), using_qnn:true] (%12039:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1)]) -> (%12048:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.30.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1231, solved=1), ), using_qnn:true] (%12048:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1)]) -> (%12049:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1231, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.30.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1231, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1231, solved=1), ), using_qnn:true] (%12049:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1231, solved=1)], %12047:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1)]) -> (%12050:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1231, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.30.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1231, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1231, solved=1), ), using_qnn:true] (%12050:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1231, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%12051:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1231, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.30.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1), ), using_qnn:true] (%12039:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%12052:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.30.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1231, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1), ), using_qnn:true] (%12052:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1)], %12051:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1231, solved=1)]) -> (%12053:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.30.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1232, solved=1), ), using_qnn:true] (%12053:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1228, solved=1)]) -> (%12054:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1232, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.30.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1232, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1), ), using_qnn:true] (%12054:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1232, solved=1)]) -> (%12055:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.30.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1), ), using_qnn:true] (%12055:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1)]) -> (%12057:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.30.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1225, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1234, solved=1), ), using_qnn:true] (%12037:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1225, solved=1)]) -> (%12058:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1234, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.30.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1234, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1235, solved=1), ), using_qnn:true] (%12058:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1234, solved=1)]) -> (%12059:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1235, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.30.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1), ), using_qnn:true] (%10001:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1)], %12057:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1)]) -> (%12061:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.30.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1235, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1), ), using_qnn:true] (%10002:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1)], %12059:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1235, solved=1)]) -> (%12062:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.30.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1), ), using_qnn:true] (%12061:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1)]) -> (%12063:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.30.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1), ), using_qnn:true] (%12062:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1)]) -> (%12064:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.30.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1236, solved=1), ), using_qnn:true] (%12046:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1226, solved=1)], %12063:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=33, solved=1)]) -> (%12065:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1236, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.30.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1236, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1237, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1236, solved=1), ), using_qnn:true] (%12065:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1236, solved=1)], %12066:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1237, solved=1)]) -> (%12067:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1236, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.30.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1236, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1238, solved=1), ), using_qnn:true] (%12067:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1236, solved=1)]) -> (%12068:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1238, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.30.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1238, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1239, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1238, solved=1), ), using_qnn:true] (%12068:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1238, solved=1)], %12069:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1239, solved=1)]) -> (%12070:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1238, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.30.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1240, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1241, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %12071:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1240, solved=1), constant:[0]]) -> (%12072:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1241, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.30.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1241, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1236, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1238, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1242, solved=1), ), using_qnn:true] (%12072:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1241, solved=1)], %12067:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1236, solved=1)], %12070:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1238, solved=1)]) -> (%12073:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1242, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.30.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1242, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1243, solved=1), ), using_qnn:true] (%12073:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1242, solved=1)]) -> (%12074:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1243, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.30.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1243, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1244, solved=1), ), using_qnn:true] (%12074:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1243, solved=1)], %12064:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=69, solved=1)]) -> (%12075:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1244, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.30.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1244, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1244, solved=1), ), using_qnn:true] (%12075:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1244, solved=1)]) -> (%12076:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1244, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.30.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1244, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1244, solved=1), ), using_qnn:true] (%12076:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1244, solved=1)]) -> (%12077:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1244, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.30.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1244, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1246, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1245, solved=1)), using_qnn:true] (%12077:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1244, solved=1)]) -> (%12078:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1246, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.30.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1246, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1246, solved=1), ), using_qnn:true] (%12078:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1246, solved=1)]) -> (%12079:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1246, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.30.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1246, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%12026:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12079:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1246, solved=1)]) -> (%12080:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.30.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1247, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1248, solved=1)), using_qnn:true] (%12080:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12081:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1247, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.30.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1247, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1247, solved=1), ), using_qnn:true] (%12081:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1247, solved=1)]) -> (%12082:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1247, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.30.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1247, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1250, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1249, solved=1)), using_qnn:true] (%12082:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1247, solved=1)]) -> (%12083:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1250, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.30.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1250, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1250, solved=1), ), using_qnn:true] (%12083:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1250, solved=1)]) -> (%12084:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1250, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.30.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1247, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1251, solved=1)), using_qnn:true] (%12082:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1247, solved=1)]) -> (%12085:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.30.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1), ), using_qnn:true] (%12085:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1)]) -> (%12086:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.30.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1253, solved=1), ), using_qnn:true] (%12086:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1)]) -> (%12087:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1253, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.30.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1253, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1), ), using_qnn:true] (%12086:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1)], %12087:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1253, solved=1)]) -> (%12088:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.30.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1250, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1), ), using_qnn:true] (%12088:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1)], %12084:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1250, solved=1)]) -> (%12089:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.30.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1), ), using_qnn:true] (%12089:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1)]) -> (%12090:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.30.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1255, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1254, solved=1)), using_qnn:true] (%12090:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1252, solved=1)]) -> (%12091:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1255, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.30.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1255, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1255, solved=1), ), using_qnn:true] (%12091:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1255, solved=1)]) -> (%12092:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1255, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.30.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1255, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%12080:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12092:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1255, solved=1)]) -> (%12093:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.31.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1256, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1257, solved=1)), using_qnn:true] (%12093:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12094:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1256, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.31.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1256, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1256, solved=1), ), using_qnn:true] (%12094:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1256, solved=1)]) -> (%12095:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1256, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.31.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1256, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1259, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1258, solved=1)), using_qnn:true] (%12095:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1256, solved=1)]) -> (%12096:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1259, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.31.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1256, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1261, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1260, solved=1)), using_qnn:true] (%12095:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1256, solved=1)]) -> (%12097:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1261, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.31.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1256, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1263, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1262, solved=1)), using_qnn:true] (%12095:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1256, solved=1)]) -> (%12098:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1263, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.31.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1259, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1259, solved=1), ), using_qnn:true] (%12096:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1259, solved=1)]) -> (%12099:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1259, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.31.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1259, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1259, solved=1), ), using_qnn:true] (%12099:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1259, solved=1)]) -> (%12100:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1259, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.31.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1261, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1261, solved=1), ), using_qnn:true] (%12097:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1261, solved=1)]) -> (%12101:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1261, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.31.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1261, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1261, solved=1), ), using_qnn:true] (%12101:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1261, solved=1)]) -> (%12102:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1261, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.31.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1263, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1263, solved=1), ), using_qnn:true] (%12098:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1263, solved=1)]) -> (%12103:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1263, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.31.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1263, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1263, solved=1), ), using_qnn:true] (%12103:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1263, solved=1)]) -> (%12104:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1263, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.31.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1259, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1265, solved=1)), using_qnn:true] (%12100:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1259, solved=1)]) -> (%12105:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.31.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1261, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1267, solved=1)), using_qnn:true] (%12102:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1261, solved=1)]) -> (%12106:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.31.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.31.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.31.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1), ), using_qnn:true] (%12105:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1)]) -> (%12107:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.31.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1), ), using_qnn:true] (%12105:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1)]) -> (%12108:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.31.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1268, solved=1), ), using_qnn:true] (%12108:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1)]) -> (%12109:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1268, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.31.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1268, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1268, solved=1), ), using_qnn:true] (%12109:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1268, solved=1)], %12107:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1)]) -> (%12110:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1268, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.31.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1268, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1268, solved=1), ), using_qnn:true] (%12110:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1268, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%12111:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1268, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.31.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1), ), using_qnn:true] (%12105:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%12112:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.31.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1268, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1), ), using_qnn:true] (%12112:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1)], %12111:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1268, solved=1)]) -> (%12113:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.31.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1), ), using_qnn:true] (%12106:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1)]) -> (%12114:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.31.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1), ), using_qnn:true] (%12106:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1)]) -> (%12115:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.31.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1269, solved=1), ), using_qnn:true] (%12115:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1)]) -> (%12116:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1269, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.31.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1269, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1269, solved=1), ), using_qnn:true] (%12116:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1269, solved=1)], %12114:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1)]) -> (%12117:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1269, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.31.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1269, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1269, solved=1), ), using_qnn:true] (%12117:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1269, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%12118:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1269, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.31.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1), ), using_qnn:true] (%12106:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%12119:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.31.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1269, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1), ), using_qnn:true] (%12119:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1)], %12118:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1269, solved=1)]) -> (%12120:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.31.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1270, solved=1), ), using_qnn:true] (%12120:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1266, solved=1)]) -> (%12121:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1270, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.31.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1270, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1), ), using_qnn:true] (%12121:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1270, solved=1)]) -> (%12122:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.31.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1), ), using_qnn:true] (%12122:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1)]) -> (%12124:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.31.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1263, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1272, solved=1), ), using_qnn:true] (%12104:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1263, solved=1)]) -> (%12125:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1272, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.31.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1272, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1273, solved=1), ), using_qnn:true] (%12125:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1272, solved=1)]) -> (%12126:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1273, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.31.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1), ), using_qnn:true] (%10003:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1)], %12124:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1)]) -> (%12128:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.31.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1273, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1), ), using_qnn:true] (%10004:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1)], %12126:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1273, solved=1)]) -> (%12129:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.31.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1), ), using_qnn:true] (%12128:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1)]) -> (%12130:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.31.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1), ), using_qnn:true] (%12129:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1)]) -> (%12131:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.31.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1274, solved=1), ), using_qnn:true] (%12113:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1264, solved=1)], %12130:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=34, solved=1)]) -> (%12132:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1274, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.31.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1274, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1275, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1274, solved=1), ), using_qnn:true] (%12132:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1274, solved=1)], %12133:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1275, solved=1)]) -> (%12134:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1274, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.31.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1274, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1276, solved=1), ), using_qnn:true] (%12134:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1274, solved=1)]) -> (%12135:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1276, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.31.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1276, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1277, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1276, solved=1), ), using_qnn:true] (%12135:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1276, solved=1)], %12136:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1277, solved=1)]) -> (%12137:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1276, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.31.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1278, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1279, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %12138:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1278, solved=1), constant:[0]]) -> (%12139:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1279, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.31.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1279, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1274, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1276, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1280, solved=1), ), using_qnn:true] (%12139:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1279, solved=1)], %12134:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1274, solved=1)], %12137:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1276, solved=1)]) -> (%12140:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1280, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.31.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1280, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1281, solved=1), ), using_qnn:true] (%12140:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1280, solved=1)]) -> (%12141:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1281, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.31.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1281, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1282, solved=1), ), using_qnn:true] (%12141:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1281, solved=1)], %12131:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=70, solved=1)]) -> (%12142:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1282, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.31.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1282, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1282, solved=1), ), using_qnn:true] (%12142:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1282, solved=1)]) -> (%12143:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1282, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.31.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1282, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1282, solved=1), ), using_qnn:true] (%12143:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1282, solved=1)]) -> (%12144:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1282, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.31.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1282, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1284, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1283, solved=1)), using_qnn:true] (%12144:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1282, solved=1)]) -> (%12145:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1284, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.31.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1284, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1284, solved=1), ), using_qnn:true] (%12145:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1284, solved=1)]) -> (%12146:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1284, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.31.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1284, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%12093:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12146:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1284, solved=1)]) -> (%12147:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.31.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1285, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1286, solved=1)), using_qnn:true] (%12147:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12148:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1285, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.31.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1285, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1285, solved=1), ), using_qnn:true] (%12148:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1285, solved=1)]) -> (%12149:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1285, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.31.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1285, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1288, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1287, solved=1)), using_qnn:true] (%12149:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1285, solved=1)]) -> (%12150:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1288, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.31.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1288, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1288, solved=1), ), using_qnn:true] (%12150:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1288, solved=1)]) -> (%12151:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1288, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.31.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1285, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1289, solved=1)), using_qnn:true] (%12149:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1285, solved=1)]) -> (%12152:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.31.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1), ), using_qnn:true] (%12152:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1)]) -> (%12153:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.31.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1291, solved=1), ), using_qnn:true] (%12153:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1)]) -> (%12154:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1291, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.31.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1291, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1), ), using_qnn:true] (%12153:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1)], %12154:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1291, solved=1)]) -> (%12155:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.31.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1288, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1), ), using_qnn:true] (%12155:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1)], %12151:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1288, solved=1)]) -> (%12156:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.31.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1), ), using_qnn:true] (%12156:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1)]) -> (%12157:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.31.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1293, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1292, solved=1)), using_qnn:true] (%12157:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1290, solved=1)]) -> (%12158:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1293, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.31.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1293, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1293, solved=1), ), using_qnn:true] (%12158:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1293, solved=1)]) -> (%12159:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1293, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.31.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1293, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%12147:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12159:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1293, solved=1)]) -> (%12160:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.32.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1294, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1295, solved=1)), using_qnn:true] (%12160:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12161:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1294, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.32.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1294, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1294, solved=1), ), using_qnn:true] (%12161:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1294, solved=1)]) -> (%12162:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1294, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.32.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1294, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1297, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1296, solved=1)), using_qnn:true] (%12162:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1294, solved=1)]) -> (%12163:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1297, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.32.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1294, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1299, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1298, solved=1)), using_qnn:true] (%12162:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1294, solved=1)]) -> (%12164:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1299, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.32.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1294, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1301, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1300, solved=1)), using_qnn:true] (%12162:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1294, solved=1)]) -> (%12165:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1301, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.32.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1297, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1297, solved=1), ), using_qnn:true] (%12163:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1297, solved=1)]) -> (%12166:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1297, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.32.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1297, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1297, solved=1), ), using_qnn:true] (%12166:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1297, solved=1)]) -> (%12167:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1297, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.32.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1299, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1299, solved=1), ), using_qnn:true] (%12164:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1299, solved=1)]) -> (%12168:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1299, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.32.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1299, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1299, solved=1), ), using_qnn:true] (%12168:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1299, solved=1)]) -> (%12169:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1299, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.32.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1301, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1301, solved=1), ), using_qnn:true] (%12165:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1301, solved=1)]) -> (%12170:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1301, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.32.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1301, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1301, solved=1), ), using_qnn:true] (%12170:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1301, solved=1)]) -> (%12171:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1301, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.32.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1297, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1303, solved=1)), using_qnn:true] (%12167:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1297, solved=1)]) -> (%12172:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.32.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1299, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1305, solved=1)), using_qnn:true] (%12169:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1299, solved=1)]) -> (%12173:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.32.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.32.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.32.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1), ), using_qnn:true] (%12172:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1)]) -> (%12174:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.32.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1), ), using_qnn:true] (%12172:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1)]) -> (%12175:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.32.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1306, solved=1), ), using_qnn:true] (%12175:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1)]) -> (%12176:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1306, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.32.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1306, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1306, solved=1), ), using_qnn:true] (%12176:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1306, solved=1)], %12174:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1)]) -> (%12177:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1306, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.32.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1306, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1306, solved=1), ), using_qnn:true] (%12177:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1306, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%12178:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1306, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.32.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1), ), using_qnn:true] (%12172:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%12179:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.32.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1306, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1), ), using_qnn:true] (%12179:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1)], %12178:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1306, solved=1)]) -> (%12180:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.32.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1), ), using_qnn:true] (%12173:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1)]) -> (%12181:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.32.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1), ), using_qnn:true] (%12173:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1)]) -> (%12182:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.32.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1307, solved=1), ), using_qnn:true] (%12182:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1)]) -> (%12183:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1307, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.32.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1307, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1307, solved=1), ), using_qnn:true] (%12183:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1307, solved=1)], %12181:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1)]) -> (%12184:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1307, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.32.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1307, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1307, solved=1), ), using_qnn:true] (%12184:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1307, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%12185:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1307, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.32.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1), ), using_qnn:true] (%12173:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%12186:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.32.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1307, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1), ), using_qnn:true] (%12186:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1)], %12185:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1307, solved=1)]) -> (%12187:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.32.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1308, solved=1), ), using_qnn:true] (%12187:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1304, solved=1)]) -> (%12188:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1308, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.32.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1308, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1), ), using_qnn:true] (%12188:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1308, solved=1)]) -> (%12189:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.32.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1), ), using_qnn:true] (%12189:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1)]) -> (%12191:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.32.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1301, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1310, solved=1), ), using_qnn:true] (%12171:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1301, solved=1)]) -> (%12192:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1310, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.32.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1310, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1311, solved=1), ), using_qnn:true] (%12192:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1310, solved=1)]) -> (%12193:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1311, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.32.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1), ), using_qnn:true] (%10005:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1)], %12191:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1)]) -> (%12195:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.32.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1311, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1), ), using_qnn:true] (%10006:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1)], %12193:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1311, solved=1)]) -> (%12196:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.32.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1), ), using_qnn:true] (%12195:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1)]) -> (%12197:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.32.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1), ), using_qnn:true] (%12196:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1)]) -> (%12198:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.32.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1312, solved=1), ), using_qnn:true] (%12180:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1302, solved=1)], %12197:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=35, solved=1)]) -> (%12199:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1312, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.32.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1312, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1313, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1312, solved=1), ), using_qnn:true] (%12199:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1312, solved=1)], %12200:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1313, solved=1)]) -> (%12201:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1312, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.32.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1312, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1314, solved=1), ), using_qnn:true] (%12201:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1312, solved=1)]) -> (%12202:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1314, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.32.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1314, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1315, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1314, solved=1), ), using_qnn:true] (%12202:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1314, solved=1)], %12203:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1315, solved=1)]) -> (%12204:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1314, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.32.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1316, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1317, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %12205:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1316, solved=1), constant:[0]]) -> (%12206:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1317, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.32.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1317, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1312, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1314, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1318, solved=1), ), using_qnn:true] (%12206:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1317, solved=1)], %12201:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1312, solved=1)], %12204:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1314, solved=1)]) -> (%12207:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1318, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.32.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1318, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1319, solved=1), ), using_qnn:true] (%12207:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1318, solved=1)]) -> (%12208:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1319, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.32.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1319, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1320, solved=1), ), using_qnn:true] (%12208:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1319, solved=1)], %12198:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=71, solved=1)]) -> (%12209:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1320, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.32.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1320, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1320, solved=1), ), using_qnn:true] (%12209:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1320, solved=1)]) -> (%12210:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1320, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.32.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1320, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1320, solved=1), ), using_qnn:true] (%12210:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1320, solved=1)]) -> (%12211:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1320, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.32.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1320, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1322, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1321, solved=1)), using_qnn:true] (%12211:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1320, solved=1)]) -> (%12212:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1322, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.32.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1322, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1322, solved=1), ), using_qnn:true] (%12212:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1322, solved=1)]) -> (%12213:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1322, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.32.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1322, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%12160:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12213:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1322, solved=1)]) -> (%12214:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.32.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1323, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1324, solved=1)), using_qnn:true] (%12214:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12215:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1323, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.32.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1323, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1323, solved=1), ), using_qnn:true] (%12215:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1323, solved=1)]) -> (%12216:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1323, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.32.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1323, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1326, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1325, solved=1)), using_qnn:true] (%12216:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1323, solved=1)]) -> (%12217:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1326, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.32.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1326, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1326, solved=1), ), using_qnn:true] (%12217:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1326, solved=1)]) -> (%12218:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1326, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.32.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1323, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1327, solved=1)), using_qnn:true] (%12216:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1323, solved=1)]) -> (%12219:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.32.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1), ), using_qnn:true] (%12219:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1)]) -> (%12220:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.32.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1329, solved=1), ), using_qnn:true] (%12220:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1)]) -> (%12221:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1329, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.32.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1329, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1), ), using_qnn:true] (%12220:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1)], %12221:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1329, solved=1)]) -> (%12222:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.32.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1326, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1), ), using_qnn:true] (%12222:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1)], %12218:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1326, solved=1)]) -> (%12223:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.32.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1), ), using_qnn:true] (%12223:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1)]) -> (%12224:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.32.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1331, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1330, solved=1)), using_qnn:true] (%12224:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1328, solved=1)]) -> (%12225:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1331, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.32.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1331, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1331, solved=1), ), using_qnn:true] (%12225:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1331, solved=1)]) -> (%12226:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1331, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.32.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1331, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%12214:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12226:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1331, solved=1)]) -> (%12227:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.33.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1332, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1333, solved=1)), using_qnn:true] (%12227:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12228:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1332, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.33.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1332, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1332, solved=1), ), using_qnn:true] (%12228:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1332, solved=1)]) -> (%12229:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1332, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.33.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1332, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1335, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1334, solved=1)), using_qnn:true] (%12229:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1332, solved=1)]) -> (%12230:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1335, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.33.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1332, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1337, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1336, solved=1)), using_qnn:true] (%12229:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1332, solved=1)]) -> (%12231:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1337, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.33.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1332, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1339, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1338, solved=1)), using_qnn:true] (%12229:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1332, solved=1)]) -> (%12232:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1339, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.33.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1335, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1335, solved=1), ), using_qnn:true] (%12230:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1335, solved=1)]) -> (%12233:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1335, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.33.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1335, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1335, solved=1), ), using_qnn:true] (%12233:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1335, solved=1)]) -> (%12234:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1335, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.33.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1337, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1337, solved=1), ), using_qnn:true] (%12231:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1337, solved=1)]) -> (%12235:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1337, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.33.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1337, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1337, solved=1), ), using_qnn:true] (%12235:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1337, solved=1)]) -> (%12236:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1337, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.33.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1339, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1339, solved=1), ), using_qnn:true] (%12232:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1339, solved=1)]) -> (%12237:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1339, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.33.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1339, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1339, solved=1), ), using_qnn:true] (%12237:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1339, solved=1)]) -> (%12238:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1339, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.33.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1335, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1341, solved=1)), using_qnn:true] (%12234:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1335, solved=1)]) -> (%12239:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.33.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1337, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1343, solved=1)), using_qnn:true] (%12236:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1337, solved=1)]) -> (%12240:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.33.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.33.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.33.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1), ), using_qnn:true] (%12239:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1)]) -> (%12241:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.33.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1), ), using_qnn:true] (%12239:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1)]) -> (%12242:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.33.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1344, solved=1), ), using_qnn:true] (%12242:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1)]) -> (%12243:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1344, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.33.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1344, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1344, solved=1), ), using_qnn:true] (%12243:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1344, solved=1)], %12241:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1)]) -> (%12244:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1344, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.33.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1344, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1344, solved=1), ), using_qnn:true] (%12244:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1344, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%12245:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1344, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.33.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1), ), using_qnn:true] (%12239:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%12246:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.33.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1344, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1), ), using_qnn:true] (%12246:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1)], %12245:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1344, solved=1)]) -> (%12247:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.33.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1), ), using_qnn:true] (%12240:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1)]) -> (%12248:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.33.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1), ), using_qnn:true] (%12240:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1)]) -> (%12249:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.33.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1345, solved=1), ), using_qnn:true] (%12249:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1)]) -> (%12250:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1345, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.33.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1345, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1345, solved=1), ), using_qnn:true] (%12250:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1345, solved=1)], %12248:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1)]) -> (%12251:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1345, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.33.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1345, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1345, solved=1), ), using_qnn:true] (%12251:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1345, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%12252:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1345, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.33.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1), ), using_qnn:true] (%12240:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%12253:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.33.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1345, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1), ), using_qnn:true] (%12253:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1)], %12252:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1345, solved=1)]) -> (%12254:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.33.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1346, solved=1), ), using_qnn:true] (%12254:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1342, solved=1)]) -> (%12255:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1346, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.33.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1346, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1), ), using_qnn:true] (%12255:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1346, solved=1)]) -> (%12256:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.33.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1), ), using_qnn:true] (%12256:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1)]) -> (%12258:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.33.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1339, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1348, solved=1), ), using_qnn:true] (%12238:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1339, solved=1)]) -> (%12259:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1348, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.33.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1348, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1349, solved=1), ), using_qnn:true] (%12259:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1348, solved=1)]) -> (%12260:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1349, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.33.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1), ), using_qnn:true] (%10007:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1)], %12258:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1)]) -> (%12262:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.33.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1349, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1), ), using_qnn:true] (%10008:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1)], %12260:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1349, solved=1)]) -> (%12263:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.33.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1), ), using_qnn:true] (%12262:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1)]) -> (%12264:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.33.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1), ), using_qnn:true] (%12263:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1)]) -> (%12265:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.33.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1350, solved=1), ), using_qnn:true] (%12247:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1340, solved=1)], %12264:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=36, solved=1)]) -> (%12266:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1350, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.33.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1350, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1351, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1350, solved=1), ), using_qnn:true] (%12266:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1350, solved=1)], %12267:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1351, solved=1)]) -> (%12268:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1350, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.33.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1350, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1352, solved=1), ), using_qnn:true] (%12268:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1350, solved=1)]) -> (%12269:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1352, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.33.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1352, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1353, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1352, solved=1), ), using_qnn:true] (%12269:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1352, solved=1)], %12270:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1353, solved=1)]) -> (%12271:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1352, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.33.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1354, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1355, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %12272:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1354, solved=1), constant:[0]]) -> (%12273:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1355, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.33.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1355, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1350, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1352, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1356, solved=1), ), using_qnn:true] (%12273:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1355, solved=1)], %12268:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1350, solved=1)], %12271:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1352, solved=1)]) -> (%12274:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1356, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.33.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1356, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1357, solved=1), ), using_qnn:true] (%12274:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1356, solved=1)]) -> (%12275:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1357, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.33.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1357, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1358, solved=1), ), using_qnn:true] (%12275:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1357, solved=1)], %12265:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=72, solved=1)]) -> (%12276:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1358, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.33.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1358, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1358, solved=1), ), using_qnn:true] (%12276:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1358, solved=1)]) -> (%12277:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1358, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.33.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1358, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1358, solved=1), ), using_qnn:true] (%12277:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1358, solved=1)]) -> (%12278:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1358, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.33.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1358, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1360, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1359, solved=1)), using_qnn:true] (%12278:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1358, solved=1)]) -> (%12279:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1360, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.33.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1360, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1360, solved=1), ), using_qnn:true] (%12279:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1360, solved=1)]) -> (%12280:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1360, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.33.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1360, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%12227:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12280:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1360, solved=1)]) -> (%12281:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.33.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1361, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1362, solved=1)), using_qnn:true] (%12281:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12282:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1361, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.33.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1361, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1361, solved=1), ), using_qnn:true] (%12282:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1361, solved=1)]) -> (%12283:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1361, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.33.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1361, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1364, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1363, solved=1)), using_qnn:true] (%12283:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1361, solved=1)]) -> (%12284:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1364, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.33.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1364, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1364, solved=1), ), using_qnn:true] (%12284:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1364, solved=1)]) -> (%12285:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1364, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.33.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1361, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1365, solved=1)), using_qnn:true] (%12283:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1361, solved=1)]) -> (%12286:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.33.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1), ), using_qnn:true] (%12286:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1)]) -> (%12287:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.33.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1367, solved=1), ), using_qnn:true] (%12287:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1)]) -> (%12288:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1367, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.33.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1367, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1), ), using_qnn:true] (%12287:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1)], %12288:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1367, solved=1)]) -> (%12289:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.33.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1364, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1), ), using_qnn:true] (%12289:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1)], %12285:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1364, solved=1)]) -> (%12290:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.33.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1), ), using_qnn:true] (%12290:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1)]) -> (%12291:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.33.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1369, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1368, solved=1)), using_qnn:true] (%12291:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1366, solved=1)]) -> (%12292:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1369, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.33.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1369, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1369, solved=1), ), using_qnn:true] (%12292:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1369, solved=1)]) -> (%12293:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1369, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.33.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1369, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%12281:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12293:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1369, solved=1)]) -> (%12294:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.34.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1370, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1371, solved=1)), using_qnn:true] (%12294:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12295:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1370, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.34.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1370, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1370, solved=1), ), using_qnn:true] (%12295:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1370, solved=1)]) -> (%12296:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1370, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.34.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1370, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1373, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1372, solved=1)), using_qnn:true] (%12296:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1370, solved=1)]) -> (%12297:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1373, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.34.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1370, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1375, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1374, solved=1)), using_qnn:true] (%12296:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1370, solved=1)]) -> (%12298:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1375, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.34.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1370, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1377, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1376, solved=1)), using_qnn:true] (%12296:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1370, solved=1)]) -> (%12299:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1377, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.34.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1373, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1373, solved=1), ), using_qnn:true] (%12297:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1373, solved=1)]) -> (%12300:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1373, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.34.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1373, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1373, solved=1), ), using_qnn:true] (%12300:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1373, solved=1)]) -> (%12301:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1373, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.34.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1375, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1375, solved=1), ), using_qnn:true] (%12298:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1375, solved=1)]) -> (%12302:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1375, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.34.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1375, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1375, solved=1), ), using_qnn:true] (%12302:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1375, solved=1)]) -> (%12303:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1375, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.34.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1377, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1377, solved=1), ), using_qnn:true] (%12299:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1377, solved=1)]) -> (%12304:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1377, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.34.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1377, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1377, solved=1), ), using_qnn:true] (%12304:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1377, solved=1)]) -> (%12305:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1377, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.34.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1373, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1379, solved=1)), using_qnn:true] (%12301:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1373, solved=1)]) -> (%12306:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.34.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1375, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1381, solved=1)), using_qnn:true] (%12303:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1375, solved=1)]) -> (%12307:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.34.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.34.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.34.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1), ), using_qnn:true] (%12306:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1)]) -> (%12308:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.34.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1), ), using_qnn:true] (%12306:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1)]) -> (%12309:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.34.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1382, solved=1), ), using_qnn:true] (%12309:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1)]) -> (%12310:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1382, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.34.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1382, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1382, solved=1), ), using_qnn:true] (%12310:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1382, solved=1)], %12308:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1)]) -> (%12311:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1382, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.34.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1382, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1382, solved=1), ), using_qnn:true] (%12311:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1382, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%12312:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1382, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.34.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1), ), using_qnn:true] (%12306:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%12313:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.34.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1382, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1), ), using_qnn:true] (%12313:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1)], %12312:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1382, solved=1)]) -> (%12314:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.34.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1), ), using_qnn:true] (%12307:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1)]) -> (%12315:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.34.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1), ), using_qnn:true] (%12307:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1)]) -> (%12316:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.34.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1383, solved=1), ), using_qnn:true] (%12316:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1)]) -> (%12317:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1383, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.34.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1383, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1383, solved=1), ), using_qnn:true] (%12317:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1383, solved=1)], %12315:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1)]) -> (%12318:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1383, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.34.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1383, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1383, solved=1), ), using_qnn:true] (%12318:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1383, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%12319:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1383, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.34.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1), ), using_qnn:true] (%12307:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%12320:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.34.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1383, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1), ), using_qnn:true] (%12320:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1)], %12319:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1383, solved=1)]) -> (%12321:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.34.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1384, solved=1), ), using_qnn:true] (%12321:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1380, solved=1)]) -> (%12322:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1384, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.34.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1384, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1), ), using_qnn:true] (%12322:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1384, solved=1)]) -> (%12323:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.34.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1), ), using_qnn:true] (%12323:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1)]) -> (%12325:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.34.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1377, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1386, solved=1), ), using_qnn:true] (%12305:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1377, solved=1)]) -> (%12326:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1386, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.34.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1386, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1387, solved=1), ), using_qnn:true] (%12326:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1386, solved=1)]) -> (%12327:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1387, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.34.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1), ), using_qnn:true] (%10009:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1)], %12325:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1)]) -> (%12329:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.34.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1387, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1), ), using_qnn:true] (%10010:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1)], %12327:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1387, solved=1)]) -> (%12330:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.34.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1), ), using_qnn:true] (%12329:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1)]) -> (%12331:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.34.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1), ), using_qnn:true] (%12330:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1)]) -> (%12332:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.34.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1388, solved=1), ), using_qnn:true] (%12314:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1378, solved=1)], %12331:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=37, solved=1)]) -> (%12333:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1388, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.34.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1388, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1389, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1388, solved=1), ), using_qnn:true] (%12333:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1388, solved=1)], %12334:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1389, solved=1)]) -> (%12335:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1388, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.34.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1388, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1390, solved=1), ), using_qnn:true] (%12335:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1388, solved=1)]) -> (%12336:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1390, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.34.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1390, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1391, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1390, solved=1), ), using_qnn:true] (%12336:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1390, solved=1)], %12337:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1391, solved=1)]) -> (%12338:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1390, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.34.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1392, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1393, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %12339:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1392, solved=1), constant:[0]]) -> (%12340:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1393, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.34.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1393, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1388, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1390, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1394, solved=1), ), using_qnn:true] (%12340:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1393, solved=1)], %12335:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1388, solved=1)], %12338:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1390, solved=1)]) -> (%12341:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1394, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.34.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1394, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1395, solved=1), ), using_qnn:true] (%12341:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1394, solved=1)]) -> (%12342:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1395, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.34.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1395, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1396, solved=1), ), using_qnn:true] (%12342:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1395, solved=1)], %12332:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=73, solved=1)]) -> (%12343:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1396, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.34.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1396, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1396, solved=1), ), using_qnn:true] (%12343:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1396, solved=1)]) -> (%12344:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1396, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.34.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1396, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1396, solved=1), ), using_qnn:true] (%12344:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1396, solved=1)]) -> (%12345:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1396, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.34.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1396, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1398, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1397, solved=1)), using_qnn:true] (%12345:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1396, solved=1)]) -> (%12346:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1398, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.34.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1398, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1398, solved=1), ), using_qnn:true] (%12346:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1398, solved=1)]) -> (%12347:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1398, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.34.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1398, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%12294:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12347:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1398, solved=1)]) -> (%12348:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.34.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1399, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1400, solved=1)), using_qnn:true] (%12348:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12349:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1399, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.34.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1399, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1399, solved=1), ), using_qnn:true] (%12349:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1399, solved=1)]) -> (%12350:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1399, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.34.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1399, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1402, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1401, solved=1)), using_qnn:true] (%12350:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1399, solved=1)]) -> (%12351:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1402, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.34.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1402, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1402, solved=1), ), using_qnn:true] (%12351:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1402, solved=1)]) -> (%12352:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1402, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.34.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1399, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1403, solved=1)), using_qnn:true] (%12350:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1399, solved=1)]) -> (%12353:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.34.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1), ), using_qnn:true] (%12353:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1)]) -> (%12354:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.34.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1405, solved=1), ), using_qnn:true] (%12354:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1)]) -> (%12355:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1405, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.34.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1405, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1), ), using_qnn:true] (%12354:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1)], %12355:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1405, solved=1)]) -> (%12356:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.34.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1402, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1), ), using_qnn:true] (%12356:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1)], %12352:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1402, solved=1)]) -> (%12357:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.34.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1), ), using_qnn:true] (%12357:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1)]) -> (%12358:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.34.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1407, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1406, solved=1)), using_qnn:true] (%12358:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1404, solved=1)]) -> (%12359:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1407, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.34.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1407, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1407, solved=1), ), using_qnn:true] (%12359:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1407, solved=1)]) -> (%12360:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1407, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.34.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1407, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%12348:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12360:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1407, solved=1)]) -> (%12361:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.35.input_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1408, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1409, solved=1)), using_qnn:true] (%12361:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12362:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1408, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.35.self_attn.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1408, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1408, solved=1), ), using_qnn:true] (%12362:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1408, solved=1)]) -> (%12363:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1408, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.35.self_attn.q_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1408, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1411, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1410, solved=1)), using_qnn:true] (%12363:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1408, solved=1)]) -> (%12364:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1411, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.35.self_attn.k_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1408, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1413, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1412, solved=1)), using_qnn:true] (%12363:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1408, solved=1)]) -> (%12365:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1413, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.35.self_attn.v_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1408, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1415, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1414, solved=1)), using_qnn:true] (%12363:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1408, solved=1)]) -> (%12366:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1415, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.35.self_attn.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1411, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1411, solved=1), ), using_qnn:true] (%12364:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1411, solved=1)]) -> (%12367:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1411, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.35.self_attn.Transpose.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1411, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1411, solved=1), ), using_qnn:true] (%12367:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1411, solved=1)]) -> (%12368:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1411, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.35.self_attn.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1413, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1413, solved=1), ), using_qnn:true] (%12365:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1413, solved=1)]) -> (%12369:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1413, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.35.self_attn.Transpose.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1413, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1413, solved=1), ), using_qnn:true] (%12369:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1413, solved=1)]) -> (%12370:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1413, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.35.self_attn.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1415, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1415, solved=1), ), using_qnn:true] (%12366:tensor<[1, 1, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1415, solved=1)]) -> (%12371:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1415, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.35.self_attn.Transpose.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1415, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1415, solved=1), ), using_qnn:true] (%12371:tensor<[1, 32, 8, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1415, solved=1)]) -> (%12372:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1415, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.35.self_attn.q_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1411, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1417, solved=1)), using_qnn:true] (%12368:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1411, solved=1)]) -> (%12373:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.35.self_attn.k_norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1413, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1419, solved=1)), using_qnn:true] (%12370:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1413, solved=1)]) -> (%12374:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.35.self_attn.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), ), using_qnn:true] (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.35.self_attn.View.5"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), ), using_qnn:true] (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.35.self_attn.Slice.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1), ), using_qnn:true] (%12373:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1)]) -> (%12375:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.35.self_attn.Slice.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1), ), using_qnn:true] (%12373:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1)]) -> (%12376:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.35.self_attn.Neg.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1420, solved=1), ), using_qnn:true] (%12376:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1)]) -> (%12377:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1420, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.35.self_attn.Concat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1420, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1420, solved=1), ), using_qnn:true] (%12377:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1420, solved=1)], %12375:tensor<[1, 32, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1)]) -> (%12378:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1420, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.35.self_attn.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1420, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1420, solved=1), ), using_qnn:true] (%12378:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1420, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%12379:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1420, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.35.self_attn.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1), ), using_qnn:true] (%12373:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%12380:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.35.self_attn.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1420, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1), ), using_qnn:true] (%12380:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1)], %12379:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1420, solved=1)]) -> (%12381:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.35.self_attn.Slice.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1), ), using_qnn:true] (%12374:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1)]) -> (%12382:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1)])
-            linalg.CPU.SliceOp <name="model.layers.35.self_attn.Slice.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1), ), using_qnn:true] (%12374:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1)]) -> (%12383:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1)])
-            linalg.CPU.NegOp <name="model.layers.35.self_attn.Neg.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1421, solved=1), ), using_qnn:true] (%12383:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1)]) -> (%12384:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1421, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.35.self_attn.Concat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1421, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1421, solved=1), ), using_qnn:true] (%12384:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1421, solved=1)], %12382:tensor<[1, 8, 32, 64], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1)]) -> (%12385:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1421, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.35.self_attn.Mul.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1421, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1421, solved=1), ), using_qnn:true] (%12385:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1421, solved=1)], %10015:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=76, solved=1)]) -> (%12386:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1421, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.35.self_attn.Mul.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1), ), using_qnn:true] (%12374:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1)], %10016:tensor<[1, 1, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=77, solved=1)]) -> (%12387:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.35.self_attn.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1421, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1), ), using_qnn:true] (%12387:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1)], %12386:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1421, solved=1)]) -> (%12388:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.35.self_attn.CastType.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1422, solved=1), ), using_qnn:true] (%12388:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1418, solved=1)]) -> (%12389:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1422, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.35.self_attn.CastType.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1422, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1), ), using_qnn:true] (%12389:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1422, solved=1)]) -> (%12390:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.35.self_attn.Transpose.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1), ), using_qnn:true] (%12390:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1)]) -> (%12392:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.35.self_attn.CastType.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1415, solved=1), outputs_0:QuantSpec(Raw(type: Float32), uuid=1424, solved=1), ), using_qnn:true] (%12372:tensor<[1, 8, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1415, solved=1)]) -> (%12393:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1424, solved=1)])
-            linalg.CPU.CastTypeOp <name="model.layers.35.self_attn.CastType.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: Float32), uuid=1424, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1425, solved=1), ), using_qnn:true] (%12393:tensor<[1, 8, 32, 128], Float32, CPU>[quant_recipe:QuantSpec(Raw(type: Float32), uuid=1424, solved=1)]) -> (%12394:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1425, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.35.self_attn.Concat.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1), ), using_qnn:true] (%10011:tensor<[1, 8, 128, 992], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1)], %12392:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1)]) -> (%12396:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1)])
-            linalg.CPU.ConcatOp <name="model.layers.35.self_attn.Concat.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1425, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1), ), using_qnn:true] (%10012:tensor<[1, 8, 992, 128], UInt8PerTensor, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1)], %12394:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1425, solved=1)]) -> (%12397:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.35.self_attn.Repeat.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1), ), using_qnn:true] (%12396:tensor<[1, 8, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1)]) -> (%12398:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1)])
-            linalg.CPU.RepeatOp <name="model.layers.35.self_attn.Repeat.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1), outputs_0:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1), ), using_qnn:true] (%12397:tensor<[1, 8, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1)]) -> (%12399:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.35.self_attn.MatMul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1426, solved=1), ), using_qnn:true] (%12381:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1416, solved=1)], %12398:tensor<[1, 32, 128, 1024], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=38, solved=1)]) -> (%12400:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1426, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.35.self_attn.Mul.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1426, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1427, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1426, solved=1), ), using_qnn:true] (%12400:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1426, solved=1)], %12401:tensor<[1], UInt16, CPU>[constant:[65535], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1427, solved=1)]) -> (%12402:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1426, solved=1)])
-            linalg.CPU.ReduceMinOp <name="model.layers.35.self_attn.ReduceMin.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1426, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1428, solved=1), ), using_qnn:true] (%12402:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1426, solved=1)]) -> (%12403:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1428, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.35.self_attn.Add.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1428, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1429, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1428, solved=1), ), using_qnn:true] (%12403:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1428, solved=1)], %12404:tensor<[1], UInt16, CPU>[constant:[0], quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1429, solved=1)]) -> (%12405:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1428, solved=1)])
-            linalg.CPU.EqualOp <name="model.layers.35.self_attn.Equal.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt16), uuid=2, solved=1), inputs_1:QuantSpec(Raw(type: UInt16), uuid=1430, solved=1), outputs_0:QuantSpec(Raw(type: UInt8), uuid=1431, solved=1), ), using_qnn:true] (%9940:tensor<[1, 1, 32, 1024], UInt16, CPU>[qnn_graph_inputs:true, quant_recipe:QuantSpec(Raw(type: UInt16), uuid=2, solved=1)], %12406:tensor<[1], UInt16, CPU>[quant_recipe:QuantSpec(Raw(type: UInt16), uuid=1430, solved=1), constant:[0]]) -> (%12407:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1431, solved=1)])
-            linalg.CPU.WhereOp <name="model.layers.35.self_attn.Where.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(Raw(type: UInt8), uuid=1431, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1426, solved=1), inputs_2:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1428, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1432, solved=1), ), using_qnn:true] (%12407:tensor<[1, 1, 32, 1024], UInt8, CPU>[quant_recipe:QuantSpec(Raw(type: UInt8), uuid=1431, solved=1)], %12402:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1426, solved=1)], %12405:tensor<[1, 32, 32, 1], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1428, solved=1)]) -> (%12408:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1432, solved=1)])
-            linalg.CPU.SoftmaxOp <name="model.layers.35.self_attn.Softmax.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1432, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1433, solved=1), ), using_qnn:true] (%12408:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1432, solved=1)]) -> (%12409:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1433, solved=1)])
-            linalg.CPU.MatMulOp <name="model.layers.35.self_attn.MatMul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1433, solved=1), inputs_1:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1434, solved=1), ), using_qnn:true] (%12409:tensor<[1, 32, 32, 1024], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1433, solved=1)], %12399:tensor<[1, 32, 1024, 128], UInt8PerTensor, CPU>[quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=74, solved=1)]) -> (%12410:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1434, solved=1)])
-            linalg.CPU.TransposeOp <name="model.layers.35.self_attn.Transpose.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1434, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1434, solved=1), ), using_qnn:true] (%12410:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1434, solved=1)]) -> (%12411:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1434, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.35.self_attn.View.6"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1434, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1434, solved=1), ), using_qnn:true] (%12411:tensor<[1, 32, 32, 128], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1434, solved=1)]) -> (%12412:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1434, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.35.self_attn.o_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1434, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1436, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1435, solved=1)), using_qnn:true] (%12412:tensor<[1, 1, 32, 4096], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1434, solved=1)]) -> (%12413:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1436, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.35.self_attn.View.7"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1436, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1436, solved=1), ), using_qnn:true] (%12413:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1436, solved=1)]) -> (%12414:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1436, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.35.Add.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1436, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%12361:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12414:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1436, solved=1)]) -> (%12415:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.layers.35.post_attention_layernorm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1437, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1438, solved=1)), using_qnn:true] (%12415:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12416:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1437, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.35.mlp.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1437, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1437, solved=1), ), using_qnn:true] (%12416:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1437, solved=1)]) -> (%12417:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1437, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.35.mlp.up_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1437, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1440, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1439, solved=1)), using_qnn:true] (%12417:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1437, solved=1)]) -> (%12418:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1440, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.35.mlp.View.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1440, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1440, solved=1), ), using_qnn:true] (%12418:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1440, solved=1)]) -> (%12419:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1440, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.35.mlp.gate_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1437, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1441, solved=1)), using_qnn:true] (%12417:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1437, solved=1)]) -> (%12420:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.35.mlp.View.2"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1), ), using_qnn:true] (%12420:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1)]) -> (%12421:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1)])
-            linalg.CPU.SigmoidOp <name="model.layers.35.mlp.Sigmoid.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1443, solved=1), ), using_qnn:true] (%12421:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1)]) -> (%12422:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1443, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.35.mlp.Mul.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1443, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1), ), using_qnn:true] (%12421:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1)], %12422:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1443, solved=1)]) -> (%12423:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1)])
-            linalg.CPU.MulOp <name="model.layers.35.mlp.Mul.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1440, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1), ), using_qnn:true] (%12423:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1)], %12419:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1440, solved=1)]) -> (%12424:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.35.mlp.View.3"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1), ), using_qnn:true] (%12424:tensor<[1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1)]) -> (%12425:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1)])
-            linalg.CPU.Conv2DOp <name="model.layers.35.mlp.down_proj"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1445, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1444, solved=1)), using_qnn:true] (%12425:tensor<[1, 1, 32, 9728], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1442, solved=1)]) -> (%12426:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1445, solved=1)])
-            linalg.CPU.ViewOp <name="model.layers.35.mlp.View.4"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1445, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1445, solved=1), ), using_qnn:true] (%12426:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1445, solved=1)]) -> (%12427:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1445, solved=1)])
-            linalg.CPU.AddOp <name="model.layers.35.Add.1"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), inputs_1:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1445, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), ), using_qnn:true] (%12415:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)], %12427:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1445, solved=1)]) -> (%12428:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)])
-            linalg.CPU.RMSNormOp <name="model.norm"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1446, solved=1), weight_weight:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1447, solved=1)), using_qnn:true] (%12428:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=75, solved=1)]) -> (%12429:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1446, solved=1)])
-            linalg.CPU.ViewOp <name="model.View.0"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1446, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1446, solved=1), ), using_qnn:true] (%12429:tensor<[1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1446, solved=1)]) -> (%12430:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1446, solved=1)])
-            linalg.CPU.Conv2DOp <name="lm_head"> [qnn_graph_name:model.0.s32, qnn_context_name:context.0, quant_recipe:QuantAnnotation(inputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1446, solved=1), outputs_0:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1449, solved=1), weight_weight:QuantSpec(LPBQ(quant_min: -8, quant_max: 7, block_size: 32, ch_axis: 0, scale_level_0_bitwidth: 4, quant_to_type: UInt4, scale_1_type: Float32), uuid=1448, solved=1)), using_qnn:true] (%12430:tensor<[1, 1, 32, 2560], UInt16PerTensor, CPU>[quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1446, solved=1)]) -> (%12431:tensor<[1, 1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1449, solved=1)])
-            cf.ReturnOp (%12431:tensor<[1, 1, 32, 151936], UInt16PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(AsymPerTensor(quant_min: 0, quant_max: 65535, quant_to_type: UInt16, scale_type: Float32, zero_point_type: Int32), uuid=1449, solved=1)], %10047:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=93, solved=1)], %10114:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=131, solved=1)], %10181:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=169, solved=1)], %10248:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=207, solved=1)], %10315:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=245, solved=1)], %10382:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=283, solved=1)], %10449:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=321, solved=1)], %10516:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=359, solved=1)], %10583:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=397, solved=1)], %10650:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=435, solved=1)], %10717:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=473, solved=1)], %10784:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=511, solved=1)], %10851:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=549, solved=1)], %10918:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=587, solved=1)], %10985:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=625, solved=1)], %11052:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=663, solved=1)], %11119:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=701, solved=1)], %11186:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=739, solved=1)], %11253:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=777, solved=1)], %11320:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=815, solved=1)], %11387:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=853, solved=1)], %11454:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=891, solved=1)], %11521:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=929, solved=1)], %11588:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=967, solved=1)], %11655:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1005, solved=1)], %11722:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1043, solved=1)], %11789:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1081, solved=1)], %11856:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1119, solved=1)], %11923:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1157, solved=1)], %11990:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1195, solved=1)], %12057:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1233, solved=1)], %12124:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1271, solved=1)], %12191:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1309, solved=1)], %12258:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1347, solved=1)], %12325:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1385, solved=1)], %12392:tensor<[1, 8, 128, 32], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1423, solved=1)], %10049:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=95, solved=1)], %10116:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=133, solved=1)], %10183:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=171, solved=1)], %10250:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=209, solved=1)], %10317:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=247, solved=1)], %10384:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=285, solved=1)], %10451:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=323, solved=1)], %10518:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=361, solved=1)], %10585:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=399, solved=1)], %10652:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=437, solved=1)], %10719:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=475, solved=1)], %10786:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=513, solved=1)], %10853:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=551, solved=1)], %10920:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=589, solved=1)], %10987:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=627, solved=1)], %11054:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=665, solved=1)], %11121:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=703, solved=1)], %11188:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=741, solved=1)], %11255:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=779, solved=1)], %11322:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=817, solved=1)], %11389:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=855, solved=1)], %11456:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=893, solved=1)], %11523:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=931, solved=1)], %11590:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=969, solved=1)], %11657:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1007, solved=1)], %11724:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1045, solved=1)], %11791:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1083, solved=1)], %11858:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1121, solved=1)], %11925:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1159, solved=1)], %11992:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1197, solved=1)], %12059:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1235, solved=1)], %12126:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1273, solved=1)], %12193:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1311, solved=1)], %12260:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1349, solved=1)], %12327:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1387, solved=1)], %12394:tensor<[1, 8, 32, 128], UInt8PerTensor, CPU>[qnn_graph_outputs:true, quant_recipe:QuantSpec(SymPerTensor(quant_min: 0, quant_max: 255, quant_to_type: UInt8, scale_type: Float32), uuid=1425, solved=1)]) -> ()
-        }
-    }
-}
- 

From 742b4bd12b6c8457656e32b3d16c04ca9ecc1bfd Mon Sep 17 00:00:00 2001
From: oreomaker <zh002919@outlook.com>
Date: Sun, 1 Feb 2026 17:29:24 +0800
Subject: [PATCH 2/7] fix(qnn): reorder QNN backend build file and update
 tensor wrapper logic

---
 mllm/CMakeLists.txt            | 9 +++++----
 mllm/backends/qnn/QNNUtils.cpp | 7 ++++++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/mllm/CMakeLists.txt b/mllm/CMakeLists.txt
index 06fa5aab..ceafc54f 100644
--- a/mllm/CMakeLists.txt
+++ b/mllm/CMakeLists.txt
@@ -125,16 +125,17 @@ if(MLLM_BUILD_OPENCL_BACKEND)
   )
 endif()
 
-if(MLLM_QUALCOMM_QNN_AOT_ON_X86_ENABLE OR MLLM_BUILD_QNN_BACKEND)
-  add_subdirectory(backends/qnn)
-endif()
-
+# add definition before including qnn
 if(MLLM_QUALCOMM_QNN_AOT_ON_X86_ENABLE)
   add_compile_definitions(
     MLLM_QUALCOMM_QNN_AOT_ON_X86_ENABLE
   )
 endif()
 
+if(MLLM_QUALCOMM_QNN_AOT_ON_X86_ENABLE OR MLLM_BUILD_QNN_BACKEND)
+  add_subdirectory(backends/qnn)
+endif()
+
 if(MLLM_BUILD_QNN_BACKEND)
   add_compile_definitions(
     MLLM_QNN_BACKEND
diff --git a/mllm/backends/qnn/QNNUtils.cpp b/mllm/backends/qnn/QNNUtils.cpp
index 73d240bb..318300db 100644
--- a/mllm/backends/qnn/QNNUtils.cpp
+++ b/mllm/backends/qnn/QNNUtils.cpp
@@ -455,7 +455,9 @@ std::shared_ptr<QNNTensorWrapper> QNNTensorWrapper::create(const std::string& na
   // it will be allocated to QNN shared buffer via QNNTensorWrapper::alloc() later
   MLLM_RT_ASSERT(!name.empty());
   // in AOT case, the tensor is all on CPU (TODO: handle this)
-  // if (type != QNN_TENSOR_TYPE_STATIC) { MLLM_RT_ASSERT(tensor.device() == kQNN); }
+#ifndef MLLM_QUALCOMM_QNN_AOT_ON_X86_ENABLE
+  if (type != QNN_TENSOR_TYPE_STATIC) { MLLM_RT_ASSERT(tensor.device() == kQNN); }
+#endif
 
   Qnn_DataType_t dataType = mllmDataTypeToQnnDataType(tensor.dtype());
 
@@ -466,6 +468,9 @@ std::shared_ptr<QNNTensorWrapper> QNNTensorWrapper::create(const std::string& na
 
   tensorWrapper->dataContainer_ = tensor;
 
+  // when passed allocated tensor, mark isAlloc_ = true
+  if (!tensor.isNil()) tensorWrapper->isAlloc_ = true;
+
   return tensorWrapper;
 }
 

From 57bad6b0d9e6523d0be91212d7e9dabada17ac9d Mon Sep 17 00:00:00 2001
From: oreomaker <zh002919@outlook.com>
Date: Sun, 1 Feb 2026 17:30:28 +0800
Subject: [PATCH 3/7] refactor(qnn): remove unused output order tracking and
 add debug utilities

- Remove qnnOutputNameToIndex_ map and related functionality that was
  tracking QNN output order since it's no longer needed
- Remove expectedOutputOrder_ and associated getter/setter methods
  from QNNModel class
- Remove the logic in QNNGraphBuildPass that was recording MLLM
  expected output order from ReturnOp
- Add new debug utility functions for printing dequantized UInt16
  tensor data with support for multi-dimensional arrays
- Fix integer casting issue in PromptProcessor prefill method
---
 mllm/backends/qnn/QNNModel.cpp                |  4 --
 mllm/backends/qnn/QNNModel.hpp                | 19 -----
 mllm/backends/qnn/QNNUtils.hpp                | 71 +++++++++++++++++++
 mllm/backends/qnn/aot_rt/PromptProcessor.cpp  |  2 +-
 .../backends/qnn/passes/QNNGraphBuildPass.cpp | 23 +-----
 5 files changed, 75 insertions(+), 44 deletions(-)

diff --git a/mllm/backends/qnn/QNNModel.cpp b/mllm/backends/qnn/QNNModel.cpp
index e99052d9..6fc6110b 100644
--- a/mllm/backends/qnn/QNNModel.cpp
+++ b/mllm/backends/qnn/QNNModel.cpp
@@ -134,8 +134,6 @@ ModelError_t QNNModel::loadGraphTensorInfo(const Qnn_Tensor_t* inputTensors, uin
 
     outputTensorWrappers_.push_back(wrapper);
     tensorWrapperMap_[tensorName] = wrapper;
-    // Record QNN output order (index in outputTensorWrappers_)
-    qnnOutputNameToIndex_[tensorName] = static_cast<int>(outputTensorWrappers_.size() - 1);
   }
 
   MLLM_INFO("QNNModel::loadGraphTensorInfo() loaded {} input tensors and {} output tensors for graph: {}", numInputTensors,
@@ -182,8 +180,6 @@ ModelError_t QNNModel::addTensorWrapper(const std::shared_ptr<QNNTensorWrapper>&
     inputTensorWrappers_.push_back(tensorWrapper);
   } else if (QNN_TENSOR_GET_TYPE(nativeTensor) == QNN_TENSOR_TYPE_APP_READ) {
     outputTensorWrappers_.push_back(tensorWrapper);
-    // Record QNN output order (index in outputTensorWrappers_)
-    qnnOutputNameToIndex_[tensorName] = static_cast<int>(outputTensorWrappers_.size() - 1);
   }
 
   return MODEL_NO_ERROR;
diff --git a/mllm/backends/qnn/QNNModel.hpp b/mllm/backends/qnn/QNNModel.hpp
index 7c0b3887..49504474 100644
--- a/mllm/backends/qnn/QNNModel.hpp
+++ b/mllm/backends/qnn/QNNModel.hpp
@@ -76,21 +76,6 @@ class QNNModel {
 
   std::map<std::string, std::vector<std::string>> getOutputTensorMap() { return modelOutputTensorMap_; }
 
-  // Set expected output order (MLLM order)
-  void setExpectedOutputOrder(const std::vector<std::string>& expectedOrder) { expectedOutputOrder_ = expectedOrder; }
-
-  // Get expected output order
-  [[nodiscard]] const std::vector<std::string>& getExpectedOutputOrder() const { return expectedOutputOrder_; }
-
-  // Get QNN output index by tensor name
-  [[nodiscard]] int getQnnOutputIndex(const std::string& tensorName) const {
-    auto it = qnnOutputNameToIndex_.find(tensorName);
-    if (it != qnnOutputNameToIndex_.end()) {
-      return it->second;
-    }
-    return -1;  // Not found
-  }
-
   // Load input/output tensor information from existing graph
   ModelError_t loadGraphTensorInfo(const Qnn_Tensor_t* inputTensors, uint32_t numInputTensors,
                                    const Qnn_Tensor_t* outputTensors, uint32_t numOutputTensors);
@@ -118,10 +103,6 @@ class QNNModel {
 
   std::map<std::string, std::vector<std::string>> modelOutputTensorMap_;
 
-  // Output order mapping: MLLM expected order and QNN actual order
-  std::vector<std::string> expectedOutputOrder_;  // MLLM expected output order (tensor names)
-  std::map<std::string, int> qnnOutputNameToIndex_;  // QNN output tensor name -> index in outputTensorWrappers_
-
   // Storage for node string parameters to ensure lifetime
   struct NodeStringStorage {
     std::string name;
diff --git a/mllm/backends/qnn/QNNUtils.hpp b/mllm/backends/qnn/QNNUtils.hpp
index 047a7935..36fb6a91 100644
--- a/mllm/backends/qnn/QNNUtils.hpp
+++ b/mllm/backends/qnn/QNNUtils.hpp
@@ -302,4 +302,75 @@ QNNParamScalarWrapper::QNNParamScalarWrapper(const std::string& name, T value) :
   }
 }
 
+// --------------- QNN Quantization Print Helper (DBG Use) ---------------
+inline void __printDequantizedUInt16TensorData(const mllm::Tensor& tensor, int dim, std::vector<int32_t>& indices, float scale,
+                                               int32_t offset) {
+  auto shape = tensor.shape();
+  if (dim >= (int)shape.size()) {
+    uint16_t val = tensor.constAt<uint16_t>(indices);
+    float fval = (static_cast<float>(val) + offset) * scale;
+    printf("%.4f", fval);
+    return;
+  }
+
+  int32_t dim_size = shape[dim];
+  printf("[");
+
+  int max_elements_per_dim = 20;
+  bool is_last_dim = (dim == (int)shape.size() - 1);
+
+  if (dim_size <= max_elements_per_dim) {
+    for (int32_t i = 0; i < dim_size; ++i) {
+      if (i > 0) {
+        printf(", ");
+        if (!is_last_dim) printf("\n");
+      }
+      indices.push_back(i);
+      __printDequantizedUInt16TensorData(tensor, dim + 1, indices, scale, offset);
+      indices.pop_back();
+    }
+  } else {
+    const int SHOW_ELEMENTS = max_elements_per_dim / 2;
+    for (int32_t i = 0; i < SHOW_ELEMENTS; ++i) {
+      if (i > 0) {
+        printf(", ");
+        if (!is_last_dim) printf("\n");
+      }
+      indices.push_back(i);
+      __printDequantizedUInt16TensorData(tensor, dim + 1, indices, scale, offset);
+      indices.pop_back();
+    }
+    if (!is_last_dim) {
+      printf(",\n...\n");
+    } else {
+      printf(", ..., ");
+    }
+
+    for (int32_t i = dim_size - SHOW_ELEMENTS; i < dim_size; ++i) {
+      if (i > dim_size - SHOW_ELEMENTS) {
+        printf(", ");
+        if (!is_last_dim) printf("\n");
+      }
+      indices.push_back(i);
+      __printDequantizedUInt16TensorData(tensor, dim + 1, indices, scale, offset);
+      indices.pop_back();
+    }
+  }
+  printf("]");
+}
+
+inline void printDequantizedTensor(const mllm::Tensor& tensor, float scale, int32_t offset) {
+  std::vector<int32_t> indices;
+  // reserve shape size
+  indices.reserve(tensor.shape().size());
+  printf("Dequantized Tensor (scale=%f, offset=%d):\n", scale, offset);
+
+  if (tensor.dtype() == mllm::kUInt16 && tensor.dtype() != mllm::kUInt16PerTensorAsy) {
+    __printDequantizedUInt16TensorData(tensor, 0, indices, scale, offset);
+  } else {
+    printf("Not supported type");
+  }
+  printf("\n");
+}
+
 }  // namespace mllm::qnn
diff --git a/mllm/backends/qnn/aot_rt/PromptProcessor.cpp b/mllm/backends/qnn/aot_rt/PromptProcessor.cpp
index f9eae715..c276fbf0 100644
--- a/mllm/backends/qnn/aot_rt/PromptProcessor.cpp
+++ b/mllm/backends/qnn/aot_rt/PromptProcessor.cpp
@@ -148,7 +148,7 @@ int64_t PromptProcessor<T>::prefill(const std::vector<int64_t>& prompt_tokens, i
     current_pos += chunk_size;
   }
 
-  auto logits = output_tensors_[0].to(kCPU).squeeze(0)[{kAll, (num_tokens + config_.ar_len - 1) % config_.ar_len, kAll}];
+  auto logits = output_tensors_[0].to(kCPU).squeeze(0)[{kAll, ((int)num_tokens + config_.ar_len - 1) % config_.ar_len, kAll}];
 
   auto cur_token = module_->sampleGreedy(logits);
 
diff --git a/mllm/backends/qnn/passes/QNNGraphBuildPass.cpp b/mllm/backends/qnn/passes/QNNGraphBuildPass.cpp
index 60b6b229..76061494 100644
--- a/mllm/backends/qnn/passes/QNNGraphBuildPass.cpp
+++ b/mllm/backends/qnn/passes/QNNGraphBuildPass.cpp
@@ -130,7 +130,8 @@ void QNNGraphBuildPass::buildQnnGraph(const ir::graph::SubGraphOp::ptr_t& sub_gr
                                             QNN_QUANTIZATION_ENCODING_SCALE_OFFSET,
                                             {.scaleOffsetEncoding = {.scale = scale, .offset = 0}}};
     }
-    ModelError_t err = qnn_model->addTensor(input_tensor->name(), QNN_TENSOR_TYPE_APP_WRITE, input_tensor->tensor_, quantize_param);
+    ModelError_t err =
+        qnn_model->addTensor(input_tensor->name(), QNN_TENSOR_TYPE_APP_WRITE, input_tensor->tensor_, quantize_param);
     if (err != MODEL_NO_ERROR) {
       MLLM_ERROR("Failed to add input tensor {} to graph '{}'", input_tensor->name(), graph_name);
       return;
@@ -139,7 +140,6 @@ void QNNGraphBuildPass::buildQnnGraph(const ir::graph::SubGraphOp::ptr_t& sub_gr
 
   // Record MLLM expected output order from ReturnOp
   std::vector<std::string> expectedOutputOrder;
-  ir::cf::ReturnOp::ptr_t return_op = nullptr;
 
   // Process each operation in the subgraph
   for (auto& region_op : graph_region->ops()) {
@@ -164,29 +164,12 @@ void QNNGraphBuildPass::buildQnnGraph(const ir::graph::SubGraphOp::ptr_t& sub_gr
         MLLM_WARN("No pattern registered for op type: {}", optype2Str(op_types));
       }
     } else if (auto ret_op = std::dynamic_pointer_cast<ir::cf::ReturnOp>(region_op)) {
-      // Record ReturnOp to extract expected output order
-      return_op = ret_op;
+      continue;
     } else {
       MLLM_WARN("Unsupported op type in QNN subgraph: {}", (int)region_op->getKind());
     }
   }
 
-  // Extract MLLM expected output order from ReturnOp inputs
-  if (return_op) {
-    for (auto& input : return_op->inputs()) {
-      auto output_tensor = input->cast_<ir::tensor::TensorValue>();
-      if (output_tensor) {
-        expectedOutputOrder.push_back(output_tensor->name());
-      }
-    }
-    // Set expected output order in QNN model
-    qnn_model->setExpectedOutputOrder(expectedOutputOrder);
-    // MLLM_INFO("QNNGraphBuildPass: Recorded MLLM expected output order for graph '{}' with {} outputs", graph_name,
-    //           expectedOutputOrder.size());
-  } else {
-    MLLM_WARN("QNNGraphBuildPass: No ReturnOp found in graph '{}', cannot determine expected output order", graph_name);
-  }
-
   // Finalize the QNN graph
   if (!qnn_backend->graphFinalize(graph_name)) {
     MLLM_ERROR("Failed to finalize QNN graph '{}'", graph_name);

From 2dd6dd3b57fc337a583715219c15a6f122e7bbba Mon Sep 17 00:00:00 2001
From: oreomaker <zh002919@outlook.com>
Date: Sun, 1 Feb 2026 17:31:26 +0800
Subject: [PATCH 4/7] refactor(core): remove comma subscript feature from slice
 operations

---
 mllm/CMakeLists.txt                           | 11 ------
 mllm/core/SlicePrimitives.hpp                 | 38 -------------------
 .../minicpm_o2_6/modeling_resampler.hpp       |  6 +--
 .../minicpm_o2_6/streaming_generation.cpp     |  2 +-
 4 files changed, 4 insertions(+), 53 deletions(-)

diff --git a/mllm/CMakeLists.txt b/mllm/CMakeLists.txt
index ceafc54f..8507df2b 100644
--- a/mllm/CMakeLists.txt
+++ b/mllm/CMakeLists.txt
@@ -56,17 +56,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "App
   endif()
 endif()
 
-# FIXME: @oreomaker Need to remove comma features in slice!
-# Suppress comma-subscript warnings (deprecated C++ feature that will be removed in C++26)
-# This flag is only available in Clang 13+ and GCC 10+
-if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
-  target_compile_options(MllmRT PUBLIC -Wno-comma-subscript)
-elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-  if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "10.0")
-    target_compile_options(MllmRT PUBLIC -Wno-comma-subscript)
-  endif()
-endif()
-
 # ONLY APPLE CAN DO !
 # Processing OpenMP
 if(MLLM_KERNEL_USE_THREADS AND MLLM_KERNEL_THREADS_VENDOR_OPENMP)
diff --git a/mllm/core/SlicePrimitives.hpp b/mllm/core/SlicePrimitives.hpp
index 59215737..d38e8b43 100644
--- a/mllm/core/SlicePrimitives.hpp
+++ b/mllm/core/SlicePrimitives.hpp
@@ -26,42 +26,4 @@ struct SliceIndicesPair {
 
 using SliceIndices = std::vector<SliceIndicesPair>;
 
-// Helper class for comma operator to enable [1,2,3] syntax
-class SliceIndicesBuilder {
- public:
-  // NOLINT for intentional implicit conversion
-  SliceIndicesBuilder(int32_t first_index) {  // NOLINT(google-explicit-constructor)
-    indices_.emplace_back(first_index);
-  }
-
-  // NOLINT for intentional implicit conversion
-  SliceIndicesBuilder(const SliceIndicesPair& first_pair) {  // NOLINT(google-explicit-constructor)
-    indices_.emplace_back(first_pair);
-  }
-
-  // operator, to chain multiple indices
-  SliceIndicesBuilder operator,(int32_t index) && {
-    indices_.emplace_back(index);
-    return std::move(*this);
-  }
-
-  SliceIndicesBuilder operator,(const SliceIndicesPair& pair) && {
-    indices_.emplace_back(pair);
-    return std::move(*this);
-  }
-
-  // Implicit conversion to SliceIndices - intentional for syntax sugar
-  operator SliceIndices() const {  // NOLINT(google-explicit-constructor)
-    return indices_;
-  }
-
- private:
-  SliceIndices indices_;
-};
-
-// Helper function to start the builder chain
-inline SliceIndicesBuilder make_slice(int32_t index) { return {index}; }
-
-inline SliceIndicesBuilder make_slice(const SliceIndicesPair& pair) { return {pair}; }
-
 }  // namespace mllm
diff --git a/mllm/models/minicpm_o2_6/modeling_resampler.hpp b/mllm/models/minicpm_o2_6/modeling_resampler.hpp
index 69795e5a..f447521b 100644
--- a/mllm/models/minicpm_o2_6/modeling_resampler.hpp
+++ b/mllm/models/minicpm_o2_6/modeling_resampler.hpp
@@ -294,7 +294,7 @@ class Resampler : public nn::Module {
     std::vector<Tensor> outputs;
     for (int32_t b = 0; b < batch_size; ++b) {
       // x for this batch
-      Tensor x_b = x[make_slice(b), kAll, kAll].view({seq_len, embed_dim_});
+      Tensor x_b = x[{b, kAll, kAll}].view({seq_len, embed_dim_});
 
       // pos_embed for this batch
       // Tensor pos_embed_b = Tensor::empty({seq_len, embed_dim_}, kFloat32).alloc();
@@ -308,12 +308,12 @@ class Resampler : public nn::Module {
       //   }
       // }
       // TODO: handle 'set 0'
-      Tensor pos_embed_b = pos_embed_padded[make_slice(b), kAll, kAll].view({seq_len, embed_dim_});
+      Tensor pos_embed_b = pos_embed_padded[{b, kAll, kAll}].view({seq_len, embed_dim_});
 
       auto kv_input = x_b + pos_embed_b;
 
       // key_padding_mask for this batch
-      Tensor key_padding_mask_b = key_padding_mask[make_slice(b), kAll].view({max_patch_len});
+      Tensor key_padding_mask_b = key_padding_mask[{b, kAll}].view({max_patch_len});
 
       bool has_padding = false;
       for (int i = 0; i < seq_len; i++) {
diff --git a/mllm/models/minicpm_o2_6/streaming_generation.cpp b/mllm/models/minicpm_o2_6/streaming_generation.cpp
index b1489470..c4f9902e 100644
--- a/mllm/models/minicpm_o2_6/streaming_generation.cpp
+++ b/mllm/models/minicpm_o2_6/streaming_generation.cpp
@@ -31,7 +31,7 @@ void StreamingGenerator::generate_next(OmniOutput& output) {
   if (spk_embeds_.isNil()) {
     streamer_ = ++streamer_;
 
-    spk_embeds_ = streamer_.getLastHiddenStates()[make_slice(0), spk_start_idx_ + 1, kAll];
+    spk_embeds_ = streamer_.getLastHiddenStates().slice({0, spk_start_idx_ + 1, mllm::kAll});
 
     std::string tts_eos_token = preprocessor::wideString2Utf8String(L"<|tts_eos|>");
     std::string tts_text = streamer_->text;

From 90c69fc4ffe910e4cfb2d51a5ba8e2bc1f6afce9 Mon Sep 17 00:00:00 2001
From: oreomaker <zh002919@outlook.com>
Date: Sun, 1 Feb 2026 17:38:18 +0800
Subject: [PATCH 5/7] feat: add configurable QNN AOT environment path parameter

---
 examples/llama_qnn_aot/compile.cpp     | 5 ++++-
 examples/llama_qnn_aot/compile_sha.cpp | 5 ++++-
 examples/qwen2_qnn_aot/compile.cpp     | 5 ++++-
 examples/qwen2_qnn_aot/compile_sha.cpp | 5 ++++-
 examples/qwen3_qnn_aot/aot_run.cpp     | 4 ----
 examples/qwen3_qnn_aot/compile.cpp     | 5 ++++-
 examples/qwen3_qnn_aot/compile_sha.cpp | 5 ++++-
 7 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/examples/llama_qnn_aot/compile.cpp b/examples/llama_qnn_aot/compile.cpp
index 3568a2f4..a064af95 100644
--- a/examples/llama_qnn_aot/compile.cpp
+++ b/examples/llama_qnn_aot/compile.cpp
@@ -17,6 +17,9 @@ MLLM_MAIN({
   auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model file path.");
   auto& model_cfg_path = Argparse::add<std::string>("-c|--config").help("Model config file path.");
   auto& qnn_aot_cfg_files = Argparse::add<std::string>("-aot_cfg|--aot_config").help("AOT Config file path.");
+  auto& qnn_env_path = Argparse::add<std::string>("-qnn_env|--qnn_env_path")
+                           .def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/")
+                           .help("QNN AOT Environment path.");
 
   Argparse::parse(argc, argv);
 
@@ -47,7 +50,7 @@ MLLM_MAIN({
   model.load(params);
 
   // Create Qnn AOT Model
-  auto qnn_aot_env = mllm::qnn::aot::QnnAOTEnv("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/",
+  auto qnn_aot_env = mllm::qnn::aot::QnnAOTEnv(qnn_env_path.get(),
                                                mllm::qnn::aot::parseQcomTargetMachineFromJSONFile(qnn_aot_cfg_files.get()));
 
   // Model length 32.
diff --git a/examples/llama_qnn_aot/compile_sha.cpp b/examples/llama_qnn_aot/compile_sha.cpp
index bd938b7a..bdc66a4a 100644
--- a/examples/llama_qnn_aot/compile_sha.cpp
+++ b/examples/llama_qnn_aot/compile_sha.cpp
@@ -25,6 +25,9 @@ MLLM_MAIN({
   auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model file path.");
   auto& model_cfg_path = Argparse::add<std::string>("-c|--config").help("Model config file path.");
   auto& qnn_aot_cfg_files = Argparse::add<std::string>("-aot_cfg|--aot_config").help("AOT Config file path.");
+  auto& qnn_env_path = Argparse::add<std::string>("-qnn_env|--qnn_env_path")
+                           .def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/")
+                           .help("QNN AOT Environment path.");
 
   Argparse::parse(argc, argv);
 
@@ -73,7 +76,7 @@ MLLM_MAIN({
   model.load(params);
 
   // Create Qnn AOT Model
-  auto qnn_aot_env = mllm::qnn::aot::QnnAOTEnv("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/",
+  auto qnn_aot_env = mllm::qnn::aot::QnnAOTEnv(qnn_env_path.get(),
                                                mllm::qnn::aot::parseQcomTargetMachineFromJSONFile(qnn_aot_cfg_files.get()));
 
   // Model length 32.
diff --git a/examples/qwen2_qnn_aot/compile.cpp b/examples/qwen2_qnn_aot/compile.cpp
index 28850196..a5af957b 100644
--- a/examples/qwen2_qnn_aot/compile.cpp
+++ b/examples/qwen2_qnn_aot/compile.cpp
@@ -17,6 +17,9 @@ MLLM_MAIN({
   auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model file path.");
   auto& model_cfg_path = Argparse::add<std::string>("-c|--config").help("Model config file path.");
   auto& qnn_aot_cfg_files = Argparse::add<std::string>("-aot_cfg|--aot_config").help("AOT Config file path.");
+  auto& qnn_env_path = Argparse::add<std::string>("-qnn_env|--qnn_env_path")
+                           .def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/")
+                           .help("QNN AOT Environment path.");
 
   Argparse::parse(argc, argv);
 
@@ -47,7 +50,7 @@ MLLM_MAIN({
   model.load(params);
 
   // Create Qnn AOT Model
-  auto qnn_aot_env = mllm::qnn::aot::QnnAOTEnv("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/",
+  auto qnn_aot_env = mllm::qnn::aot::QnnAOTEnv(qnn_env_path.get(),
                                                mllm::qnn::aot::parseQcomTargetMachineFromJSONFile(qnn_aot_cfg_files.get()));
 
   // Model length 32.
diff --git a/examples/qwen2_qnn_aot/compile_sha.cpp b/examples/qwen2_qnn_aot/compile_sha.cpp
index 50aa9b5e..cd0ffcb6 100644
--- a/examples/qwen2_qnn_aot/compile_sha.cpp
+++ b/examples/qwen2_qnn_aot/compile_sha.cpp
@@ -25,6 +25,9 @@ MLLM_MAIN({
   auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model file path.");
   auto& model_cfg_path = Argparse::add<std::string>("-c|--config").help("Model config file path.");
   auto& qnn_aot_cfg_files = Argparse::add<std::string>("-aot_cfg|--aot_config").help("AOT Config file path.");
+  auto& qnn_env_path = Argparse::add<std::string>("-qnn_env|--qnn_env_path")
+                           .def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/")
+                           .help("QNN AOT Environment path.");
 
   Argparse::parse(argc, argv);
 
@@ -73,7 +76,7 @@ MLLM_MAIN({
   model.load(params);
 
   // Create Qnn AOT Model
-  auto qnn_aot_env = mllm::qnn::aot::QnnAOTEnv("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/",
+  auto qnn_aot_env = mllm::qnn::aot::QnnAOTEnv(qnn_env_path.get(),
                                                mllm::qnn::aot::parseQcomTargetMachineFromJSONFile(qnn_aot_cfg_files.get()));
 
   // Model length 32.
diff --git a/examples/qwen3_qnn_aot/aot_run.cpp b/examples/qwen3_qnn_aot/aot_run.cpp
index b9bee333..364ed4a0 100644
--- a/examples/qwen3_qnn_aot/aot_run.cpp
+++ b/examples/qwen3_qnn_aot/aot_run.cpp
@@ -43,10 +43,6 @@ MLLM_MAIN({
 
   auto input_tensor = tokenizer.convertMessage({.prompt = prompt_text});
 
-  // DBG:
-  mllm::print(input_tensor["sequence"].shape());
-  mllm::print(input_tensor["sequence"]);
-
   Runner runner(config, &tokenizer);
   if (!runner.load()) {
     std::cerr << "Failed to load model\n";
diff --git a/examples/qwen3_qnn_aot/compile.cpp b/examples/qwen3_qnn_aot/compile.cpp
index cc813fe3..6404af3c 100644
--- a/examples/qwen3_qnn_aot/compile.cpp
+++ b/examples/qwen3_qnn_aot/compile.cpp
@@ -17,6 +17,9 @@ MLLM_MAIN({
   auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model file path.");
   auto& model_cfg_path = Argparse::add<std::string>("-c|--config").help("Model config file path.");
   auto& qnn_aot_cfg_files = Argparse::add<std::string>("-aot_cfg|--aot_config").help("AOT Config file path.");
+  auto& qnn_env_path = Argparse::add<std::string>("-qnn_env|--qnn_env_path")
+                           .def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/")
+                           .help("QNN AOT Environment path.");
 
   Argparse::parse(argc, argv);
 
@@ -47,7 +50,7 @@ MLLM_MAIN({
   model.load(params);
 
   // Create Qnn AOT Model
-  auto qnn_aot_env = mllm::qnn::aot::QnnAOTEnv("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/",
+  auto qnn_aot_env = mllm::qnn::aot::QnnAOTEnv(qnn_env_path.get(),
                                                mllm::qnn::aot::parseQcomTargetMachineFromJSONFile(qnn_aot_cfg_files.get()));
 
   // Model length 32.
diff --git a/examples/qwen3_qnn_aot/compile_sha.cpp b/examples/qwen3_qnn_aot/compile_sha.cpp
index f6d25894..9f2629f6 100644
--- a/examples/qwen3_qnn_aot/compile_sha.cpp
+++ b/examples/qwen3_qnn_aot/compile_sha.cpp
@@ -25,6 +25,9 @@ MLLM_MAIN({
   auto& model_path = Argparse::add<std::string>("-m|--model_path").help("Model file path.");
   auto& model_cfg_path = Argparse::add<std::string>("-c|--config").help("Model config file path.");
   auto& qnn_aot_cfg_files = Argparse::add<std::string>("-aot_cfg|--aot_config").help("AOT Config file path.");
+  auto& qnn_env_path = Argparse::add<std::string>("-qnn_env|--qnn_env_path")
+                           .def("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/")
+                           .help("QNN AOT Environment path.");
 
   Argparse::parse(argc, argv);
 
@@ -73,7 +76,7 @@ MLLM_MAIN({
   model.load(params);
 
   // Create Qnn AOT Model
-  auto qnn_aot_env = mllm::qnn::aot::QnnAOTEnv("/opt/qcom/aistack/qairt/2.41.0.251128/lib/x86_64-linux-clang/",
+  auto qnn_aot_env = mllm::qnn::aot::QnnAOTEnv(qnn_env_path.get(),
                                                mllm::qnn::aot::parseQcomTargetMachineFromJSONFile(qnn_aot_cfg_files.get()));
 
   // Model length 32.

From 14dbec61ccca3d8008970863c250738b86e40b28 Mon Sep 17 00:00:00 2001
From: oreomaker <zh002919@outlook.com>
Date: Sun, 1 Feb 2026 17:38:40 +0800
Subject: [PATCH 6/7] docs(qnn_backend): add AOT execution documentation

---
 docs/qnn_backend/aot_execute.rst | 104 +++++++++++++++++++++++++++++++
 docs/qnn_backend/index.rst       |   2 +-
 2 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 docs/qnn_backend/aot_execute.rst

diff --git a/docs/qnn_backend/aot_execute.rst b/docs/qnn_backend/aot_execute.rst
new file mode 100644
index 00000000..e882389d
--- /dev/null
+++ b/docs/qnn_backend/aot_execute.rst
@@ -0,0 +1,104 @@
+QNN AOT Execution Flow
+================================================================
+
+This document aims to explain the main execution flow of QNN AOT (Ahead-of-Time). This implementation is designed to fully leverage the offline compilation capabilities of the Qualcomm QNN framework to achieve efficient inference of fully integer-quantized Large Language Models (LLMs) on mobile devices, which is the de facto workflow for LLM execution on the Hexagon NPU.
+
+Specifically, our implementation employs a W4A16 quantization scheme. The Key-Value (KV) Cache is quantized to ``uint8``, and the linear weights are quantized using Low-Power Blockwise Quantization (LPBQ).
+
+The implementation of this module was inspired by the `PyTorch ExecuTorch`_ project, especially its `Hybrid Execution Mode`_ designed for the Qualcomm backend, for which we are grateful.
+
+.. _PyTorch ExecuTorch: https://pytorch.org/executorch/
+.. _Hybrid Execution Mode: https://github.com/pytorch/executorch/blob/main/examples/qualcomm/oss_scripts/llama/README.md
+
+Overall Flow
+----------------------------------------------------------------
+
+The QNN AOT execution flow is mainly divided into three stages:
+
+1.  **Model Quantization and Export (Python)**: On the host machine, a Python script is used to quantize the pre-trained floating-point model and export it to the MLLM IR (``.mir``) format.
+2.  **Offline Compilation (C++)**: On the host machine, a C++ compiler program loads the ``.mir`` file, invokes the QNN toolchain for model compilation, graph optimization, and quantization parameter adjustment, and finally generates a QNN Context Binary.
+3.  **On-Device Execution (C++)**: On the target device (e.g., a mobile phone), the AOT runner program loads the pre-compiled context binary and executes inference.
+
+
+Detailed Steps
+----------------------------------------------------------------
+
+Taking ``qwen3_qnn_aot`` as an example, the detailed steps are as follows.
+
+1. **Model Quantization and Export**
+
+   First, we need to run a Python script on the host to quantize the model and export it as a ``.safetensors`` file.
+
+   .. code-block:: shell
+
+      cd ./pymllm/backends/qualcomm/transformers/qwen3
+      python train.py --model_path "/your/qwen3/model/path/" --max_length 2048 --num_samples 128 --output_dir "/path/to/output"
+
+   This step generates a key file:
+
+   *   ``model.safetensors``: The quantized model file, saved in the specified output directory.
+
+   Next, convert the exported ``.safetensors`` model to the MLLM format (``.mllm``) using the ``mllm-convertor`` script.
+
+   .. code-block:: shell
+
+      mllm-convertor --input_path /path/to/output/model.safetensors --output_path /path/to/output/qwen3_1.7b.mllm
+
+   This will generate the ``qwen3_1.7b.mllm`` file, which will be used in the subsequent compilation step.
+
+2. **Offline Compilation to Generate QNN Context**
+
+   Next, we use a C++ compiler program (``compile.cpp``) on the host to generate the QNN context. This process invokes the QNN SDK to convert the MLLM IR into a QNN-supported format and performs optimizations.
+
+   Compile and run the ``compile`` program:
+
+   .. code-block:: shell
+
+      # In the mllm-v2 project root directory
+      python task.py tasks/build_x86_qnn_aot.yaml
+
+      # Run the compiler program
+      ./build-qnn-aot/bin/mllm-qwen3-aot-sha-c \
+      -m /path/to/output/qwen3_1.7b.mllm \
+      -c ./examples/qwen3_qnn_aot/config_1.7B.json \
+      --aot_config ./examples/qwen3_qnn_aot/qnn_aot_cfg_1.7B.json
+
+
+   This program reads the ``.mllm`` model file and the quantization recipe, and finally generates a QNN context binary file named ``qwen3-1.7B-lpbq-sha.bin``. This file contains all the information needed to execute inference on the target device.
+
+   .. note::
+      The ``HtpSignedPd`` config in qnn_aot_cfg_1.7B.json will specify ``QNN_HTP_DEVICE_CONFIG_OPTION_SIGNEDPD`` during QNN initialization, which may cause an "Unsupported config option 2" error in older QNN versions. It is recommended to change the config in the json file to ``HtpUnsignedPd``.
+
+3. **On-Device AOT Inference**
+
+   Finally, we push the generated ``qwen3-1.7B-lpbq-sha.bin`` file and other resources like the tokenizer to the target device. The on-device AOT runner program (``aot_run.cpp``) will load this binary file and execute inference.
+
+   Compile and run the ``aot_run`` program:
+
+   .. code-block:: shell
+
+      # Cross-compile the aot_run program for the target device (e.g., Android)
+      task.py tasks/build_android_qnn.yaml
+
+      # Push compiled context file to the device
+      adb push qwen3-1.7B-lpbq-sha.bin /data/local/tmp/
+      
+      adb push build-android-arm64-v8a-qnn/bin/*.so /data/local/tmp
+      adb push build-android-arm64-v8a-qnn/bin/mllm-qwen3-aot-runner /data/local/tmp
+
+      # Execute on the device
+      adb shell "cd /data/local/tmp && export LD_LIBRARY_PATH=. &&
+      ./mllm-qwen3-aot-runner -m qwen3-1.7B-lpbq-sha.bin
+      -t qwen3-tokenizer.json -c config_1.7B.json --ar_len 32"
+
+   The AOT runner program loads the ``.bin`` file to initialize the QNN context, then receives input tokens, performs model inference, and outputs the next token, thus realizing the language model generation process.
+
+Hybrid Mode Explanation
+----------------------------------------------------------------
+
+Our QNN AOT implementation adopts a Hybrid mode similar to `executorch` to optimize the efficiency of Prompt processing and Token generation.
+
+*   **Prefill Phase**: When processing the user's input (Prompt) for the first time, the model calculates and caches the Key-Value (KV) states for all input tokens at once. This phase is computationally intensive but is performed only once.
+*   **Decode Phase**: When generating subsequent tokens, the model takes only the previously generated token as input and uses the cached KV state for computation. This process is computationally light and fast, suitable for token-by-token generation.
+
+In this way, we combine the advantages of batch processing and stream processing to improve overall throughput while ensuring low latency.
diff --git a/docs/qnn_backend/index.rst b/docs/qnn_backend/index.rst
index b7092f93..336ef184 100644
--- a/docs/qnn_backend/index.rst
+++ b/docs/qnn_backend/index.rst
@@ -6,4 +6,4 @@ QNN Backend
 
    setup_env
    core_design
-   qnn_model_convert
+   aot_execute

From 703068ccb54d195265524de6be3906093c1ce7ba Mon Sep 17 00:00:00 2001
From: oreomaker <zh002919@outlook.com>
Date: Mon, 2 Feb 2026 15:07:03 +0800
Subject: [PATCH 7/7] docs(qnn_backend): add environment setup reference and
 improve AOT execution guide

---
 docs/qnn_backend/aot_execute.rst | 25 +++++++++++++++++++++++--
 docs/qnn_backend/setup_env.rst   |  4 ++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/docs/qnn_backend/aot_execute.rst b/docs/qnn_backend/aot_execute.rst
index e882389d..55addfef 100644
--- a/docs/qnn_backend/aot_execute.rst
+++ b/docs/qnn_backend/aot_execute.rst
@@ -1,6 +1,9 @@
 QNN AOT Execution Flow
 ================================================================
 
+.. note::
+   Please refer to the `Environment Setup <setup_env.html>`_ documentation to configure the QNN and Hexagon SDK environments before proceeding.
+
 This document aims to explain the main execution flow of QNN AOT (Ahead-of-Time). This implementation is designed to fully leverage the offline compilation capabilities of the Qualcomm QNN framework to achieve efficient inference of fully integer-quantized Large Language Models (LLMs) on mobile devices, which is the de facto workflow for LLM execution on the Hexagon NPU.
 
 Specifically, our implementation employs a W4A16 quantization scheme. The Key-Value (KV) Cache is quantized to ``uint8``, and the linear weights are quantized using Low-Power Blockwise Quantization (LPBQ).
@@ -32,7 +35,7 @@ Taking ``qwen3_qnn_aot`` as an example, the detailed steps are as follows.
    .. code-block:: shell
 
       cd ./pymllm/backends/qualcomm/transformers/qwen3
-      python train.py --model_path "/your/qwen3/model/path/" --max_length 2048 --num_samples 128 --output_dir "/path/to/output"
+      python train.py --model_path "/your/qwen3/model/path/" --max_length 1024 --num_samples 128 --output_dir "/path/to/output"
 
    This step generates a key file:
 
@@ -41,6 +44,7 @@ Taking ``qwen3_qnn_aot`` as an example, the detailed steps are as follows.
    Next, convert the exported ``.safetensors`` model to the MLLM format (``.mllm``) using the ``mllm-convertor`` script.
 
    .. code-block:: shell
+      pip install pymllm
 
       mllm-convertor --input_path /path/to/output/model.safetensors --output_path /path/to/output/qwen3_1.7b.mllm
 
@@ -78,11 +82,28 @@ Taking ``qwen3_qnn_aot`` as an example, the detailed steps are as follows.
    .. code-block:: shell
 
       # Cross-compile the aot_run program for the target device (e.g., Android)
-      task.py tasks/build_android_qnn.yaml
+      python task.py tasks/build_android_qnn.yaml
 
       # Push compiled context file to the device
       adb push qwen3-1.7B-lpbq-sha.bin /data/local/tmp/
       
+      # Push QNN libraries and Op Packages
+      ANDR_LIB=$QNN_SDK_ROOT/lib/aarch64-android
+      OP_PATH=mllm/backends/qnn/custom-op-package/LLaMAPackage/build
+
+      adb push $ANDR_LIB/libQnnHtp.so /data/local/tmp
+      adb push $ANDR_LIB/libQnnHtpV75Stub.so /data/local/tmp
+      adb push $ANDR_LIB/libQnnHtpPrepare.so /data/local/tmp
+      adb push $ANDR_LIB/libQnnHtpProfilingReader.so /data/local/tmp
+      adb push $ANDR_LIB/libQnnHtpOptraceProfilingReader.so /data/local/tmp
+      adb push $ANDR_LIB/libQnnHtpV75CalculatorStub.so /data/local/tmp
+      adb push $QNN_SDK_ROOT/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so /data/local/tmp
+      adb push $QNN_SDK_ROOT/lib/aarch64-android/libQnnSystem.so /data/local/tmp
+
+      adb push $OP_PATH/aarch64-android/libQnnLLaMAPackage.so /data/local/tmp/libQnnLLaMAPackage_CPU.so
+      adb push $OP_PATH/hexagon-v75/libQnnLLaMAPackage.so /data/local/tmp/libQnnLLaMAPackage_HTP.so
+
+      # Push mllm runner and libs to device
       adb push build-android-arm64-v8a-qnn/bin/*.so /data/local/tmp
       adb push build-android-arm64-v8a-qnn/bin/mllm-qwen3-aot-runner /data/local/tmp
 
diff --git a/docs/qnn_backend/setup_env.rst b/docs/qnn_backend/setup_env.rst
index 6619a21b..5d6b6712 100644
--- a/docs/qnn_backend/setup_env.rst
+++ b/docs/qnn_backend/setup_env.rst
@@ -98,6 +98,10 @@ Compilation Commands
 
 This will build the necessary QNN op packages for both AArch64 and HVX v75 targets.
 
+.. note::
+   The Hexagon tools version in the Makefile may change. If compilation fails, please update the version number in the Makefile accordingly.
+
+
 Development Tips
 ----------------