Skip to content

Commit 3b1aeda

Browse files
Qualcomm AI Engine Direct - Improve CLI tools (#16012)
### Summary - Update CLI tool to support input list assignment (e.g., input1:=input1.raw input2:=input2.raw) - Update CLI tool output structure origin: outputs/output_0_0.pt outputs/outupt_1_0.pt new: outputs/Result_0/output_0.pt output/Result_1/output_0.pt ### Motivation This PR facilitates QC CI/QA teams to validate the QNN output with goldens. ### Test plan ``` python ${EXECUTORCH_ROOT}/backends/qualcomm/tests/test_qnn_delegate.py -k TestUtilsScript.test_cli_with_input_list_assignment --model <soc_model> --build_folder <build_folder> --host <host> --device <device> ```
1 parent d25cc44 commit 3b1aeda

File tree

5 files changed

+187
-42
lines changed

5 files changed

+187
-42
lines changed

backends/qualcomm/tests/models.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2042,6 +2042,14 @@ def forward(self, x, y):
20422042
return torch.sub(x, y)
20432043

20442044

2045+
class Sub_y_x_from_x_y(torch.nn.Module):
2046+
def __init__(self):
2047+
super().__init__()
2048+
2049+
def forward(self, x, y):
2050+
return torch.sub(y, x)
2051+
2052+
20452053
class SubAlpha(torch.nn.Module):
20462054
def __init__(self, alpha):
20472055
super().__init__()

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8266,6 +8266,9 @@ def test_export_example(self):
82668266

82678267

82688268
class TestUtilsScript(TestQNN):
8269+
TestQNN.atol = 1e-1
8270+
TestQNN.rtol = 1
8271+
82698272
def required_envs(self, conditions=None) -> bool:
82708273
conditions = [] if conditions is None else conditions
82718274
return all(
@@ -8407,13 +8410,91 @@ def test_cli(self):
84078410
self.target,
84088411
"--device",
84098412
self.device,
8413+
"--host",
8414+
self.host,
8415+
"--build_folder",
8416+
self.build_folder,
8417+
"--input_list",
8418+
f"{tmp_dir}/input_list",
8419+
]
8420+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
8421+
self.assertTrue(os.path.isfile(f"{tmp_dir}/e_out/Result_0/output_0.pt"))
8422+
8423+
def test_cli_with_input_list_assignment(self):
8424+
with tempfile.TemporaryDirectory() as tmp_dir:
8425+
sample_input = torch.randn(1, 2, 3, 4)
8426+
sample_input2 = torch.randn(1, 2, 3, 4)
8427+
ep = torch.export.export(
8428+
Sub_y_x_from_x_y(), (sample_input, sample_input2) # noqa: F405
8429+
)
8430+
torch.export.save(ep, f"{tmp_dir}/sub.pt2")
8431+
torch.save(sample_input, f"{tmp_dir}/input_0_0.pt")
8432+
torch.save(sample_input2, f"{tmp_dir}/input_0_1.pt")
8433+
with open(f"{tmp_dir}/input_list", "w") as f:
8434+
f.write(f"x:={tmp_dir}/input_0_0.pt y:={tmp_dir}/input_0_1.pt\n")
8435+
8436+
# quantize
8437+
cmds = [
8438+
"python",
8439+
"-m",
8440+
"examples.qualcomm.util_scripts.cli",
8441+
"quantize",
8442+
"--artifact",
8443+
f"{tmp_dir}/sub.pt2",
8444+
"--output_folder",
8445+
f"{tmp_dir}/q_out",
8446+
"--input_list",
8447+
f"{tmp_dir}/input_list",
8448+
]
8449+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
8450+
self.assertTrue(os.path.isfile(f"{tmp_dir}/q_out/sub_quantized.pt2"))
8451+
# compile
8452+
cmds = [
8453+
"python",
8454+
"-m",
8455+
"examples.qualcomm.util_scripts.cli",
8456+
"compile",
8457+
"--artifact",
8458+
f"{tmp_dir}/q_out/sub_quantized.pt2",
8459+
"--output_folder",
8460+
f"{tmp_dir}/c_out",
8461+
"--model",
8462+
self.model,
8463+
]
8464+
subprocess.run(cmds, stdout=subprocess.DEVNULL)
8465+
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/sub_quantized.pte"))
8466+
self.assertTrue(os.path.isfile(f"{tmp_dir}/c_out/sub_quantized.svg"))
8467+
# execute
8468+
cmds = [
8469+
"python",
8470+
"-m",
8471+
"examples.qualcomm.util_scripts.cli",
8472+
"execute",
8473+
"--artifact",
8474+
f"{tmp_dir}/c_out/sub_quantized.pte",
8475+
"--output_folder",
8476+
f"{tmp_dir}/e_out",
8477+
"--model",
8478+
self.model,
8479+
"--target",
8480+
self.target,
8481+
"--device",
8482+
self.device,
8483+
"--host",
8484+
self.host,
84108485
"--build_folder",
84118486
self.build_folder,
84128487
"--input_list",
84138488
f"{tmp_dir}/input_list",
84148489
]
8490+
if self.host:
8491+
cmds.extend(["--host", self.host])
84158492
subprocess.run(cmds, stdout=subprocess.DEVNULL)
8416-
self.assertTrue(os.path.isfile(f"{tmp_dir}/e_out/output_0_0.pt"))
8493+
output_file = f"{tmp_dir}/e_out/Result_0/output_0.pt"
8494+
self.assertTrue(os.path.isfile(output_file))
8495+
device_output = torch.load(output_file, weights_only=True)
8496+
golden_output = ep.module()(sample_input, sample_input2)
8497+
self._assert_outputs_equal(golden_output, device_output)
84178498

84188499

84198500
def setup_environment():

examples/qualcomm/executor_runner/qnn_executor_runner.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,11 @@ int main(int argc, char** argv) {
424424
int inference_index = 0;
425425
double elapsed_time = 0;
426426
while (std::getline(input_list, file_path)) {
427+
// to avoid case where \r\n is used as EOL
428+
if (!file_path.empty() && file_path.back() == '\r') {
429+
file_path.pop_back();
430+
}
431+
427432
auto input_files = split(file_path, " ");
428433
if (input_files.size() == 0) {
429434
break;

examples/qualcomm/util_scripts/cli.py

Lines changed: 78 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7+
# This tool supports the QC internal QA pipeline by quantizing, compiling,
8+
# and executing models under various configuration flags.
9+
710
import argparse
811
import importlib
912
import logging
1013
import os
1114
import re
15+
import shutil
1216
from pathlib import Path
1317

1418
import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManagerAdaptor
@@ -34,16 +38,14 @@
3438
to_edge_transform_and_lower_to_qnn,
3539
)
3640
from executorch.examples.qualcomm.qaihub_scripts.utils.utils import preprocess_binary
37-
from executorch.examples.qualcomm.utils import (
38-
make_output_dir,
39-
make_quantizer,
40-
SimpleADB,
41-
)
41+
from executorch.examples.qualcomm.utils import make_quantizer, SimpleADB
4242
from executorch.exir import ExecutorchBackendConfig
4343
from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
4444
from torchao.quantization import pt2e
4545
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
4646

47+
INPUT_ORDER = "input_order"
48+
4749

4850
def get_logger():
4951
logger = logging.getLogger("examples.qualcomm.util_scripts.cli")
@@ -74,6 +76,7 @@ def fill_tensor_info(info, qnn_tensors, category):
7476
"offset": encoding.data["offset"].tolist(),
7577
"axis": encoding.axis,
7678
}
79+
7780
info[category].append(
7881
{
7982
"name": tensor.GetName(),
@@ -106,6 +109,26 @@ def fill_tensor_info(info, qnn_tensors, category):
106109
return tensor_info
107110

108111

112+
class InputListParser:
113+
def __init__(self, input_list):
114+
self.input_list = input_list
115+
116+
def __iter__(self):
117+
with open(self.input_list, "r") as f:
118+
for line in re.split(r"\r?\n", f.read()):
119+
if not line:
120+
continue
121+
split_line = line.strip().split(" ")
122+
inputs = {}
123+
if ":=" in line:
124+
for input_assignment in split_line:
125+
name, path = input_assignment.split(":=")
126+
inputs[name] = torch.load(path, weights_only=True)
127+
else:
128+
inputs = [torch.load(t, weights_only=True) for t in split_line]
129+
yield inputs
130+
131+
109132
def quantize(args):
110133
logger = get_logger()
111134

@@ -131,15 +154,21 @@ def quantize(args):
131154
ep_prepared = prepare_pt2e(ep.module(), quantizer)
132155
logger.info(f"perform calibration on {args.artifact}")
133156
# step 2: perform calibration
134-
with open(args.input_list, "r") as f:
135-
for line in f.read().split("\n")[:-1]:
136-
inputs = [torch.load(t, weights_only=True) for t in line.split(" ")]
137-
ep_prepared(*inputs)
157+
input_list_parser = InputListParser(args.input_list)
158+
graph_input_names = [
159+
spec.arg.name
160+
for spec in ep.graph_signature.input_specs
161+
if spec.kind.name == "USER_INPUT"
162+
]
163+
for inputs in input_list_parser:
164+
if isinstance(inputs, dict):
165+
inputs = [inputs[name] for name in graph_input_names]
166+
ep_prepared(*inputs)
138167
# step 3: use convert_pt2e to fix encodings of QDQ pairs
139168
logger.info(f"saving calibrated model for {args.artifact}")
140169
ep_converted = convert_pt2e(ep_prepared)
141170
ep_quantized = torch.export.export(ep_converted, tuple(inputs))
142-
make_output_dir(args.output_folder)
171+
os.makedirs(args.output_folder, exist_ok=True)
143172
torch.export.save(
144173
ep_quantized, f"{args.output_folder}/{Path(args.artifact).stem}_quantized.pt2"
145174
)
@@ -155,7 +184,7 @@ def compile(args):
155184
)
156185

157186
file_name, extension = Path(args.artifact).stem, Path(args.artifact).suffix
158-
make_output_dir(args.output_folder)
187+
os.makedirs(args.output_folder, exist_ok=True)
159188
# setup compiler spec dedicated to QNN HTP backend
160189
backend_options = generate_htp_compiler_spec(use_fp16=True)
161190
# setup general compiler spec for QNN
@@ -201,12 +230,13 @@ def compile(args):
201230

202231
for user_pass in user_passes:
203232
passes[user_pass][QCOM_PASS_ACTIVATE_KEY] = True
204-
233+
input_order = {INPUT_ORDER: ep.graph_signature.user_inputs}
205234
edge_prog_mgr = to_edge_transform_and_lower_to_qnn(
206235
module=ep.module(),
207236
inputs=sample_inputs,
208237
compiler_specs=compiler_specs,
209238
passes_job=passes,
239+
constant_methods=input_order,
210240
)
211241
# step 2: write pte files and store final graph
212242
logger.info(f"exporting {file_name}.pte")
@@ -227,15 +257,30 @@ def execute(args):
227257

228258
pte_name = Path(args.artifact).stem
229259

260+
# get input order
261+
from executorch.runtime import Runtime, Verification
262+
263+
et_runtime = Runtime.get()
264+
program = et_runtime.load_program(
265+
args.artifact,
266+
verification=Verification.Minimal,
267+
)
268+
input_order_func = program.load_method(INPUT_ORDER)
269+
input_order = input_order_func.execute([])
270+
230271
# load input files
231272
logger.info("loading user inputs")
273+
input_list_parser = InputListParser(args.input_list)
232274
user_inputs = []
233-
with open(args.input_list, "r") as f:
234-
for line in f.read().split("\n")[:-1]:
235-
inputs, input_names = [], ""
236-
for data in line.split(" "):
237-
input_names += f"{Path(data).stem}.raw "
238-
inputs.append(torch.load(data, weights_only=True))
275+
for inputs in input_list_parser:
276+
if isinstance(inputs, dict):
277+
ordered_inputs = []
278+
# since io_info is dict and it is ordered in python
279+
# we use it to reorder input assignments here
280+
for name in input_order:
281+
ordered_inputs.append(inputs[name])
282+
user_inputs.append(ordered_inputs)
283+
else:
239284
user_inputs.append(inputs)
240285

241286
logger.info("retrieving graph I/O")
@@ -247,7 +292,6 @@ def execute(args):
247292
backend_options=backend_options,
248293
)
249294
io_info = get_io_info(args.artifact, compiler_specs)
250-
251295
logger.info("preparing ADB connection")
252296
# leverage SimpleADB for e2e inference
253297
adb = SimpleADB(
@@ -263,11 +307,16 @@ def execute(args):
263307
)
264308

265309
logger.info("pushing QNN libraries & other artifacts")
310+
266311
adb.push(inputs=user_inputs)
267312

268313
logger.info("starting inference")
269314
adb.execute()
270315

316+
tmp_dir = f"{args.output_folder}/tmp_outputs"
317+
os.makedirs(tmp_dir, exist_ok=True)
318+
os.makedirs(args.output_folder, exist_ok=True)
319+
271320
def post_process():
272321
torch_to_numpy_dtype_dict = {
273322
torch.bool: np.dtype("bool"),
@@ -283,11 +332,14 @@ def post_process():
283332
torch.complex128: np.dtype("complex128"),
284333
}
285334
output_info = io_info["outputs"]
286-
output_folder = f"{args.output_folder}/outputs"
287-
for _, f in enumerate(os.listdir(output_folder)):
288-
filename = os.path.join(output_folder, f)
289-
match_res = re.match(r".*([0-9]+)_([0-9]+)\.raw$", filename)
335+
tmp_output_folder = f"{tmp_dir}/outputs"
336+
for _, f in enumerate(os.listdir(tmp_output_folder)):
337+
filename = os.path.join(tmp_output_folder, f)
338+
match_res = re.match(r".*output_([0-9]+)_([0-9]+)\.raw$", filename)
290339
data_index, output_index = int(match_res.group(1)), int(match_res.group(2))
340+
341+
output_result_folder = f"{args.output_folder}/Result_{data_index}"
342+
os.makedirs(output_result_folder, exist_ok=True)
291343
output = np.fromfile(
292344
filename,
293345
dtype=eval(
@@ -297,13 +349,11 @@ def post_process():
297349
output = torch.from_numpy(
298350
output.reshape(output_info[output_index]["shape"])
299351
)
300-
torch.save(
301-
output, f"{args.output_folder}/output_{data_index}_{output_index}.pt"
302-
)
352+
torch.save(output, f"{output_result_folder}/output_{output_index}.pt")
303353

304354
logger.info("collecting output data")
305-
make_output_dir(args.output_folder)
306-
adb.pull(args.output_folder, post_process)
355+
adb.pull(tmp_dir, post_process)
356+
shutil.rmtree(tmp_dir)
307357
logger.info(f"execution finished, please check {args.output_folder} for results")
308358

309359

examples/qualcomm/utils.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -148,23 +148,23 @@ def push(self, inputs=None, input_list=None, files=None, init_env=True):
148148
f"{self.build_path}/backends/qualcomm/libqnn_executorch_backend.so",
149149
f"{self.qnn_sdk}/lib/{self.target}/libQnnModelDlc.so",
150150
]
151-
input_list_file, input_files = generate_inputs(
152-
self.working_dir, self.input_list_filename, inputs
153-
)
151+
with tempfile.TemporaryDirectory() as tmp_dir:
152+
input_list_file, input_files = generate_inputs(
153+
tmp_dir, self.input_list_filename, inputs
154+
)
154155

155-
if input_list_file is not None:
156-
# prepare input list
157-
artifacts.append(input_list_file)
156+
if input_list_file is not None:
157+
# prepare input list
158+
artifacts.append(input_list_file)
158159

159-
for artifact in artifacts:
160-
self._adb(["push", artifact, self.workspace])
160+
for artifact in artifacts:
161+
self._adb(["push", artifact, self.workspace])
161162

162-
# input data
163-
for file_name in input_files:
164-
self._adb(["push", file_name, self.workspace])
163+
# input data
164+
for file_name in input_files:
165+
self._adb(["push", file_name, self.workspace])
165166

166-
# dynamic shape related
167-
with tempfile.TemporaryDirectory() as tmp_dir:
167+
# dynamic shape related
168168
if self.expected_input_shape and self.expected_output_shape:
169169
shape_info = {
170170
"input_shape": self.expected_input_shape,
@@ -956,6 +956,7 @@ def prepare_input_file(tensor, fd, index, sub_index):
956956
# Prepare input data
957957
if inputs is not None:
958958
input_list_file = f"{dest_path}/{file_name}"
959+
959960
with open(input_list_file, "w") as f:
960961
for idx, data in enumerate(inputs):
961962
sub_index = 0

0 commit comments

Comments
 (0)