From 16dbbe72184522a04b873740ccb3e22fb9dbad69 Mon Sep 17 00:00:00 2001 From: Sharvari Medhe Date: Thu, 21 Aug 2025 03:55:27 +0000 Subject: [PATCH 1/7] fixed isort changes Signed-off-by: Sharvari Medhe --- QEfficient/__init__.py | 72 +++++++------- QEfficient/generation/cloud_infer.py | 139 +++++++++++++++------------ 2 files changed, 113 insertions(+), 98 deletions(-) diff --git a/QEfficient/__init__.py b/QEfficient/__init__.py index be4b86321..8e824b488 100644 --- a/QEfficient/__init__.py +++ b/QEfficient/__init__.py @@ -8,19 +8,50 @@ import os import warnings -from QEfficient.utils import custom_format_warning - # For faster downloads via hf_transfer # This code is put above import statements as this needs to be executed before -# hf_transfer is imported (will happen on line 15 via leading imports) +# hf_transfer is imported (will happen on line 14 via leading imports) os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" + # Placeholder for all non-transformer models registered in QEfficient import QEfficient.utils.model_registery # noqa: F401 +from QEfficient.base import ( + QEFFAutoModel, + QEFFAutoModelForCausalLM, + QEFFAutoModelForImageTextToText, + QEFFAutoModelForSpeechSeq2Seq, + QEFFCommonLoader, +) +from QEfficient.compile.compile_helper import compile +from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter +from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv +from QEfficient.peft import QEffAutoPeftModelForCausalLM +from QEfficient.transformers.transform import transform +from QEfficient.utils import custom_format_warning from QEfficient.utils.logging_utils import logger # custom warning for the better logging experience warnings.formatwarning = custom_format_warning +# Conditionally import QAIC-related modules if the SDK is installed +__version__ = "0.0.1.dev0" + +# Users can use QEfficient.export for exporting models to ONNX +export = qualcomm_efficient_converter + +__all__ = [ + "transform", + "export", + "compile", + "cloud_ai_100_exec_kv", + "QEFFAutoModel", + "QEFFAutoModelForCausalLM", + "QEffAutoPeftModelForCausalLM", + "QEFFAutoModelForImageTextToText", + "QEFFAutoModelForSpeechSeq2Seq", + "QEFFCommonLoader", +] + def check_qaic_sdk(): """Check if QAIC SDK is installed""" @@ -36,38 +67,5 @@ def check_qaic_sdk(): return False -# Conditionally import QAIC-related modules if the SDK is installed -__version__ = "0.0.1.dev0" - -if check_qaic_sdk(): - from QEfficient.base import ( - QEFFAutoModel, - QEFFAutoModelForCausalLM, - QEFFAutoModelForImageTextToText, - QEFFAutoModelForSpeechSeq2Seq, - QEFFCommonLoader, - ) - from QEfficient.compile.compile_helper import compile - from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter - from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv - from QEfficient.peft import QEffAutoPeftModelForCausalLM - from QEfficient.transformers.transform import transform - - # Users can use QEfficient.export for exporting models to ONNX - export = qualcomm_efficient_converter - - __all__ = [ - "transform", - "export", - "compile", - "cloud_ai_100_exec_kv", - "QEFFAutoModel", - "QEFFAutoModelForCausalLM", - "QEffAutoPeftModelForCausalLM", - "QEFFAutoModelForImageTextToText", - "QEFFAutoModelForSpeechSeq2Seq", - "QEFFCommonLoader", - ] - -else: +if not check_qaic_sdk(): logger.warning("QAIC SDK is not installed, eager mode features won't be available!") diff --git a/QEfficient/generation/cloud_infer.py b/QEfficient/generation/cloud_infer.py index 8519d824c..062945acc 100644 --- a/QEfficient/generation/cloud_infer.py +++ b/QEfficient/generation/cloud_infer.py @@ -5,43 +5,40 @@ # # ----------------------------------------------------------------------------- +import importlib +import platform +import sys from pathlib import Path from typing import Dict, List, Optional, Union from warnings import warn import numpy as np -try: - import qaicrt -except ImportError: - import platform - import sys - sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}") - import qaicrt - -try: - import QAicApi_pb2 as aicapi -except ImportError: - import sys - - sys.path.append("/opt/qti-aic/dev/python") - import QAicApi_pb2 as aicapi +class QAICInferenceSession: + _qaicrt = None + _aicapi = None -aic_to_np_dtype_mapping = { - aicapi.FLOAT_TYPE: np.dtype(np.float32), - aicapi.FLOAT_16_TYPE: np.dtype(np.float16), - aicapi.INT8_Q_TYPE: np.dtype(np.int8), - aicapi.UINT8_Q_TYPE: np.dtype(np.uint8), - aicapi.INT16_Q_TYPE: np.dtype(np.int16), - aicapi.INT32_Q_TYPE: np.dtype(np.int32), - aicapi.INT32_I_TYPE: np.dtype(np.int32), - aicapi.INT64_I_TYPE: np.dtype(np.int64), - aicapi.INT8_TYPE: np.dtype(np.int8), -} + @property + def qaicrt(self): + if QAICInferenceSession._qaicrt is None: + try: + QAICInferenceSession._qaicrt = importlib.import_module("qaicrt") + except ImportError: + sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}") + QAICInferenceSession._qaicrt = importlib.import_module("qaicrt") + return QAICInferenceSession._qaicrt + @property + def aicapi(self): + if QAICInferenceSession._aicapi is None: + try: + QAICInferenceSession._aicapi = importlib.import_module("QAicApi_pb2") + except ImportError: + sys.path.append("/opt/qti-aic/dev/python") + QAICInferenceSession._aicapi = importlib.import_module("QAicApi_pb2") + return QAICInferenceSession._aicapi -class QAICInferenceSession: def __init__( self, qpc_path: Union[Path, str], @@ -58,59 +55,81 @@ def __init__( :activate: bool. If false, activation will be disabled. Default=True. :enable_debug_logs: bool. If True, It will enable debug logs. Default=False. """ + + # Build the dtype map one time, not on every property access + self.aic_to_np_dtype_mapping = { + self.aicapi.FLOAT_TYPE: np.dtype(np.float32), + self.aicapi.FLOAT_16_TYPE: np.dtype(np.float16), + self.aicapi.INT8_Q_TYPE: np.dtype(np.int8), + self.aicapi.UINT8_Q_TYPE: np.dtype(np.uint8), + self.aicapi.INT16_Q_TYPE: np.dtype(np.int16), + self.aicapi.INT32_Q_TYPE: np.dtype(np.int32), + self.aicapi.INT32_I_TYPE: np.dtype(np.int32), + self.aicapi.INT64_I_TYPE: np.dtype(np.int64), + self.aicapi.INT8_TYPE: np.dtype(np.int8), + } + # Load QPC if device_ids is not None: - devices = qaicrt.QIDList(device_ids) - self.context = qaicrt.Context(devices) - self.queue = qaicrt.Queue(self.context, device_ids[0]) + devices = self.qaicrt.QIDList(device_ids) + self.context = self.qaicrt.Context(devices) + self.queue = self.qaicrt.Queue(self.context, device_ids[0]) else: - self.context = qaicrt.Context() - self.queue = qaicrt.Queue(self.context, 0) # Async API + self.context = self.qaicrt.Context() + self.queue = self.qaicrt.Queue(self.context, 0) # Async API + if enable_debug_logs: - if self.context.setLogLevel(qaicrt.QLogLevel.QL_DEBUG) != qaicrt.QStatus.QS_SUCCESS: + if self.context.setLogLevel(self.qaicrt.QLogLevel.QL_DEBUG) != self.qaicrt.QStatus.QS_SUCCESS: raise RuntimeError("Failed to setLogLevel") - qpc = qaicrt.Qpc(str(qpc_path)) + + qpc = self.qaicrt.Qpc(str(qpc_path)) + # Load IO Descriptor - iodesc = aicapi.IoDesc() + iodesc = self.aicapi.IoDesc() status, iodesc_data = qpc.getIoDescriptor() - if status != qaicrt.QStatus.QS_SUCCESS: + if status != self.qaicrt.QStatus.QS_SUCCESS: raise RuntimeError("Failed to getIoDescriptor") iodesc.ParseFromString(bytes(iodesc_data)) + self.allowed_shapes = [ - [(aic_to_np_dtype_mapping[x.type].itemsize, list(x.dims)) for x in allowed_shape.shapes] + [(self.aic_to_np_dtype_mapping[x.type].itemsize, list(x.dims)) for x in allowed_shape.shapes] for allowed_shape in iodesc.allowed_shapes ] self.bindings = iodesc.selected_set.bindings self.binding_index_map = {binding.name: binding.index for binding in self.bindings} + # Create and load Program - prog_properties = qaicrt.QAicProgramProperties() + prog_properties = self.qaicrt.QAicProgramProperties() prog_properties.SubmitRetryTimeoutMs = 60_000 if device_ids and len(device_ids) > 1: prog_properties.devMapping = ":".join(map(str, device_ids)) - self.program = qaicrt.Program(self.context, None, qpc, prog_properties) - if self.program.load() != qaicrt.QStatus.QS_SUCCESS: + + self.program = self.qaicrt.Program(self.context, None, qpc, prog_properties) + if self.program.load() != self.qaicrt.QStatus.QS_SUCCESS: raise RuntimeError("Failed to load program") + if activate: self.activate() + # Create input qbuffers and buf_dims - self.qbuffers = [qaicrt.QBuffer(bytes(binding.size)) for binding in self.bindings] - self.buf_dims = qaicrt.BufferDimensionsVecRef( - [(aic_to_np_dtype_mapping[binding.type].itemsize, list(binding.dims)) for binding in self.bindings] + self.qbuffers = [self.qaicrt.QBuffer(bytes(binding.size)) for binding in self.bindings] + self.buf_dims = self.qaicrt.BufferDimensionsVecRef( + [(self.aic_to_np_dtype_mapping[binding.type].itemsize, list(binding.dims)) for binding in self.bindings] ) @property def input_names(self) -> List[str]: - return [binding.name for binding in self.bindings if binding.dir == aicapi.BUFFER_IO_TYPE_INPUT] + return [binding.name for binding in self.bindings if binding.dir == self.aicapi.BUFFER_IO_TYPE_INPUT] @property def output_names(self) -> List[str]: - return [binding.name for binding in self.bindings if binding.dir == aicapi.BUFFER_IO_TYPE_OUTPUT] + return [binding.name for binding in self.bindings if binding.dir == self.aicapi.BUFFER_IO_TYPE_OUTPUT] def activate(self): """Activate qpc""" self.program.activate() - self.execObj = qaicrt.ExecObj(self.context, self.program) + self.execObj = self.qaicrt.ExecObj(self.context, self.program) def deactivate(self): """Deactivate qpc""" @@ -131,7 +150,7 @@ def set_buffers(self, buffers: Dict[str, np.ndarray]): warn(f'Buffer: "{buffer_name}" not found') continue buffer_index = self.binding_index_map[buffer_name] - self.qbuffers[buffer_index] = qaicrt.QBuffer(buffer.tobytes()) + self.qbuffers[buffer_index] = self.qaicrt.QBuffer(buffer.tobytes()) self.buf_dims[buffer_index] = ( buffer.itemsize, buffer.shape if len(buffer.shape) > 0 else (1,), @@ -157,21 +176,19 @@ def run(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: Return: :Dict[str, np.ndarray]: """ - # Set inputs + self.set_buffers(inputs) - if self.execObj.setData(self.qbuffers, self.buf_dims) != qaicrt.QStatus.QS_SUCCESS: + if self.execObj.setData(self.qbuffers, self.buf_dims) != self.qaicrt.QStatus.QS_SUCCESS: raise MemoryError("Failed to setData") - # # Run with sync API - # if self.execObj.run(self.qbuffers) != qaicrt.QStatus.QS_SUCCESS: - # Run with async API - if self.queue.enqueue(self.execObj) != qaicrt.QStatus.QS_SUCCESS: + + if self.queue.enqueue(self.execObj) != self.qaicrt.QStatus.QS_SUCCESS: raise MemoryError("Failed to enqueue") - if self.execObj.waitForCompletion() != qaicrt.QStatus.QS_SUCCESS: + + if self.execObj.waitForCompletion() != self.qaicrt.QStatus.QS_SUCCESS: error_message = "Failed to run" - # Print additional error messages for unmatched dimension error + if self.allowed_shapes: - error_message += "\n\n" - error_message += '(Only if "No matching dimension found" error is present above)' + error_message += "\n\n(Only if 'No matching dimension found' error is present above)" error_message += "\nAllowed shapes:" for i, allowed_shape in enumerate(self.allowed_shapes): error_message += f"\n{i}\n" @@ -189,11 +206,11 @@ def run(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: continue error_message += f"{binding.name}:\t{elemsize}\t{shape}\n" raise ValueError(error_message) - # Get output buffers + status, output_qbuffers = self.execObj.getData() - if status != qaicrt.QStatus.QS_SUCCESS: + if status != self.qaicrt.QStatus.QS_SUCCESS: raise MemoryError("Failed to getData") - # Build output + outputs = {} for output_name in self.output_names: buffer_index = self.binding_index_map[output_name] @@ -201,6 +218,6 @@ def run(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: continue outputs[output_name] = np.frombuffer( bytes(output_qbuffers[buffer_index]), - aic_to_np_dtype_mapping[self.bindings[buffer_index].type], + self.aic_to_np_dtype_mapping[self.bindings[buffer_index].type], ).reshape(self.buf_dims[buffer_index][1]) return outputs From 5b89048cbcbbdf9d4e8b590eb35263e288f090f4 Mon Sep 17 00:00:00 2001 From: Sharvari Medhe Date: Tue, 26 Aug 2025 07:44:02 +0000 Subject: [PATCH 2/7] removed class level variables for modules Signed-off-by: Sharvari Medhe --- QEfficient/generation/cloud_infer.py | 44 ++++++++++++---------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/QEfficient/generation/cloud_infer.py b/QEfficient/generation/cloud_infer.py index 062945acc..351d7f014 100644 --- a/QEfficient/generation/cloud_infer.py +++ b/QEfficient/generation/cloud_infer.py @@ -8,6 +8,7 @@ import importlib import platform import sys +from functools import cached_property from pathlib import Path from typing import Dict, List, Optional, Union from warnings import warn @@ -16,28 +17,21 @@ class QAICInferenceSession: - _qaicrt = None - _aicapi = None - - @property + @cached_property def qaicrt(self): - if QAICInferenceSession._qaicrt is None: - try: - QAICInferenceSession._qaicrt = importlib.import_module("qaicrt") - except ImportError: - sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}") - QAICInferenceSession._qaicrt = importlib.import_module("qaicrt") - return QAICInferenceSession._qaicrt + try: + return importlib.import_module("qaicrt") + except ImportError: + sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}") + return importlib.import_module("qaicrt") - @property + @cached_property def aicapi(self): - if QAICInferenceSession._aicapi is None: - try: - QAICInferenceSession._aicapi = importlib.import_module("QAicApi_pb2") - except ImportError: - sys.path.append("/opt/qti-aic/dev/python") - QAICInferenceSession._aicapi = importlib.import_module("QAicApi_pb2") - return QAICInferenceSession._aicapi + try: + return importlib.import_module("QAicApi_pb2") + except ImportError: + sys.path.append("/opt/qti-aic/dev/python") + return importlib.import_module("QAicApi_pb2") def __init__( self, @@ -48,15 +42,14 @@ def __init__( ): """ Initialise for QAIC inference Session - --------- - :qpc_path: str. Path to the save generated binary file after compilation. - :device_ids: List[int]. Device Ids to be used for compilation. if devices > 1, it enables multiple card setup. - :activate: bool. If false, activation will be disabled. Default=True. - :enable_debug_logs: bool. If True, It will enable debug logs. Default=False. + :param qpc_path: Path to the saved compiled QPC binary. + :param device_ids: Device IDs to be used; if > 1, enables multi-card setup. + :param activate: If False, activation will be skipped. Default=True. + :param enable_debug_logs: If True, enable debug logs. Default=False. """ - # Build the dtype map one time, not on every property access + # Build dtype mapping once (depends on self.aicapi constants) self.aic_to_np_dtype_mapping = { self.aicapi.FLOAT_TYPE: np.dtype(np.float32), self.aicapi.FLOAT_16_TYPE: np.dtype(np.float16), @@ -68,7 +61,6 @@ def __init__( self.aicapi.INT64_I_TYPE: np.dtype(np.int64), self.aicapi.INT8_TYPE: np.dtype(np.int8), } - # Load QPC if device_ids is not None: devices = self.qaicrt.QIDList(device_ids) From e9c3bd3300a8d7bef37383f8e3c129fe38a00344 Mon Sep 17 00:00:00 2001 From: Sharvari Medhe Date: Wed, 3 Sep 2025 15:43:16 +0000 Subject: [PATCH 3/7] removed the functions and added import flag instead, for cleaner, more readable code Signed-off-by: Sharvari Medhe --- QEfficient/generation/cloud_infer.py | 107 ++++++++++++++------------- 1 file changed, 56 insertions(+), 51 deletions(-) diff --git a/QEfficient/generation/cloud_infer.py b/QEfficient/generation/cloud_infer.py index 351d7f014..8c9db267e 100644 --- a/QEfficient/generation/cloud_infer.py +++ b/QEfficient/generation/cloud_infer.py @@ -8,30 +8,32 @@ import importlib import platform import sys -from functools import cached_property from pathlib import Path from typing import Dict, List, Optional, Union -from warnings import warn - +from warnings import warn import numpy as np +try: + import qaicrt + is_qaicrt_imported = True +except ImportError: + try: + sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}") + import qaicrt + is_qaicrt_imported = True + except ImportError: + is_qaicrt_imported = False + +try: + import QAicApi_pb2 as aicapi +except ImportError: + try: + sys.path.append("/opt/qti-aic/dev/python") + import QAicApi_pb2 as aicapi + except ImportError: + is_aicapi_imported = False class QAICInferenceSession: - @cached_property - def qaicrt(self): - try: - return importlib.import_module("qaicrt") - except ImportError: - sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}") - return importlib.import_module("qaicrt") - - @cached_property - def aicapi(self): - try: - return importlib.import_module("QAicApi_pb2") - except ImportError: - sys.path.append("/opt/qti-aic/dev/python") - return importlib.import_module("QAicApi_pb2") def __init__( self, @@ -48,38 +50,41 @@ def __init__( :param activate: If False, activation will be skipped. Default=True. :param enable_debug_logs: If True, enable debug logs. Default=False. """ - - # Build dtype mapping once (depends on self.aicapi constants) + if not is_qaicrt_imported and not is_aicapi_imported: + raise ImportError( + "QAIC runtime not available. Please install QAIC SDK" + ) + # Build dtype mapping once (depends on aicapi constants) self.aic_to_np_dtype_mapping = { - self.aicapi.FLOAT_TYPE: np.dtype(np.float32), - self.aicapi.FLOAT_16_TYPE: np.dtype(np.float16), - self.aicapi.INT8_Q_TYPE: np.dtype(np.int8), - self.aicapi.UINT8_Q_TYPE: np.dtype(np.uint8), - self.aicapi.INT16_Q_TYPE: np.dtype(np.int16), - self.aicapi.INT32_Q_TYPE: np.dtype(np.int32), - self.aicapi.INT32_I_TYPE: np.dtype(np.int32), - self.aicapi.INT64_I_TYPE: np.dtype(np.int64), - self.aicapi.INT8_TYPE: np.dtype(np.int8), + aicapi.FLOAT_TYPE: np.dtype(np.float32), + aicapi.FLOAT_16_TYPE: np.dtype(np.float16), + aicapi.INT8_Q_TYPE: np.dtype(np.int8), + aicapi.UINT8_Q_TYPE: np.dtype(np.uint8), + aicapi.INT16_Q_TYPE: np.dtype(np.int16), + aicapi.INT32_Q_TYPE: np.dtype(np.int32), + aicapi.INT32_I_TYPE: np.dtype(np.int32), + aicapi.INT64_I_TYPE: np.dtype(np.int64), + aicapi.INT8_TYPE: np.dtype(np.int8), } # Load QPC if device_ids is not None: - devices = self.qaicrt.QIDList(device_ids) - self.context = self.qaicrt.Context(devices) - self.queue = self.qaicrt.Queue(self.context, device_ids[0]) + devices = qaicrt.QIDList(device_ids) + self.context = qaicrt.Context(devices) + self.queue = qaicrt.Queue(self.context, device_ids[0]) else: - self.context = self.qaicrt.Context() - self.queue = self.qaicrt.Queue(self.context, 0) # Async API + self.context = qaicrt.Context() + self.queue = qaicrt.Queue(self.context, 0) # Async API if enable_debug_logs: - if self.context.setLogLevel(self.qaicrt.QLogLevel.QL_DEBUG) != self.qaicrt.QStatus.QS_SUCCESS: + if self.context.setLogLevel(qaicrt.QLogLevel.QL_DEBUG) != qaicrt.QStatus.QS_SUCCESS: raise RuntimeError("Failed to setLogLevel") - qpc = self.qaicrt.Qpc(str(qpc_path)) + qpc = qaicrt.Qpc(str(qpc_path)) # Load IO Descriptor - iodesc = self.aicapi.IoDesc() + iodesc = aicapi.IoDesc() status, iodesc_data = qpc.getIoDescriptor() - if status != self.qaicrt.QStatus.QS_SUCCESS: + if status != qaicrt.QStatus.QS_SUCCESS: raise RuntimeError("Failed to getIoDescriptor") iodesc.ParseFromString(bytes(iodesc_data)) @@ -91,37 +96,37 @@ def __init__( self.binding_index_map = {binding.name: binding.index for binding in self.bindings} # Create and load Program - prog_properties = self.qaicrt.QAicProgramProperties() + prog_properties = qaicrt.QAicProgramProperties() prog_properties.SubmitRetryTimeoutMs = 60_000 if device_ids and len(device_ids) > 1: prog_properties.devMapping = ":".join(map(str, device_ids)) - self.program = self.qaicrt.Program(self.context, None, qpc, prog_properties) - if self.program.load() != self.qaicrt.QStatus.QS_SUCCESS: + self.program = qaicrt.Program(self.context, None, qpc, prog_properties) + if self.program.load() != qaicrt.QStatus.QS_SUCCESS: raise RuntimeError("Failed to load program") if activate: self.activate() # Create input qbuffers and buf_dims - self.qbuffers = [self.qaicrt.QBuffer(bytes(binding.size)) for binding in self.bindings] - self.buf_dims = self.qaicrt.BufferDimensionsVecRef( + self.qbuffers = [qaicrt.QBuffer(bytes(binding.size)) for binding in self.bindings] + self.buf_dims = qaicrt.BufferDimensionsVecRef( [(self.aic_to_np_dtype_mapping[binding.type].itemsize, list(binding.dims)) for binding in self.bindings] ) @property def input_names(self) -> List[str]: - return [binding.name for binding in self.bindings if binding.dir == self.aicapi.BUFFER_IO_TYPE_INPUT] + return [binding.name for binding in self.bindings if binding.dir == aicapi.BUFFER_IO_TYPE_INPUT] @property def output_names(self) -> List[str]: - return [binding.name for binding in self.bindings if binding.dir == self.aicapi.BUFFER_IO_TYPE_OUTPUT] + return [binding.name for binding in self.bindings if binding.dir == aicapi.BUFFER_IO_TYPE_OUTPUT] def activate(self): """Activate qpc""" self.program.activate() - self.execObj = self.qaicrt.ExecObj(self.context, self.program) + self.execObj = qaicrt.ExecObj(self.context, self.program) def deactivate(self): """Deactivate qpc""" @@ -142,7 +147,7 @@ def set_buffers(self, buffers: Dict[str, np.ndarray]): warn(f'Buffer: "{buffer_name}" not found') continue buffer_index = self.binding_index_map[buffer_name] - self.qbuffers[buffer_index] = self.qaicrt.QBuffer(buffer.tobytes()) + self.qbuffers[buffer_index] = qaicrt.QBuffer(buffer.tobytes()) self.buf_dims[buffer_index] = ( buffer.itemsize, buffer.shape if len(buffer.shape) > 0 else (1,), @@ -170,13 +175,13 @@ def run(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: """ self.set_buffers(inputs) - if self.execObj.setData(self.qbuffers, self.buf_dims) != self.qaicrt.QStatus.QS_SUCCESS: + if self.execObj.setData(self.qbuffers, self.buf_dims) != qaicrt.QStatus.QS_SUCCESS: raise MemoryError("Failed to setData") - if self.queue.enqueue(self.execObj) != self.qaicrt.QStatus.QS_SUCCESS: + if self.queue.enqueue(self.execObj) != qaicrt.QStatus.QS_SUCCESS: raise MemoryError("Failed to enqueue") - if self.execObj.waitForCompletion() != self.qaicrt.QStatus.QS_SUCCESS: + if self.execObj.waitForCompletion() != qaicrt.QStatus.QS_SUCCESS: error_message = "Failed to run" if self.allowed_shapes: @@ -200,7 +205,7 @@ def run(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: raise ValueError(error_message) status, output_qbuffers = self.execObj.getData() - if status != self.qaicrt.QStatus.QS_SUCCESS: + if status != qaicrt.QStatus.QS_SUCCESS: raise MemoryError("Failed to getData") outputs = {} From d2439f285e4d9f1f040a191e59e4570ab396915e Mon Sep 17 00:00:00 2001 From: Sharvari Medhe Date: Wed, 3 Sep 2025 15:45:17 +0000 Subject: [PATCH 4/7] fixed formatting Signed-off-by: Sharvari Medhe --- QEfficient/generation/cloud_infer.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/QEfficient/generation/cloud_infer.py b/QEfficient/generation/cloud_infer.py index 8c9db267e..d01cbecd8 100644 --- a/QEfficient/generation/cloud_infer.py +++ b/QEfficient/generation/cloud_infer.py @@ -5,21 +5,23 @@ # # ----------------------------------------------------------------------------- -import importlib import platform import sys from pathlib import Path from typing import Dict, List, Optional, Union -from warnings import warn +from warnings import warn + import numpy as np try: import qaicrt + is_qaicrt_imported = True except ImportError: try: sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}") import qaicrt + is_qaicrt_imported = True except ImportError: is_qaicrt_imported = False @@ -33,8 +35,8 @@ except ImportError: is_aicapi_imported = False -class QAICInferenceSession: +class QAICInferenceSession: def __init__( self, qpc_path: Union[Path, str], @@ -51,9 +53,7 @@ def __init__( :param enable_debug_logs: If True, enable debug logs. Default=False. """ if not is_qaicrt_imported and not is_aicapi_imported: - raise ImportError( - "QAIC runtime not available. Please install QAIC SDK" - ) + raise ImportError("QAIC runtime not available. Please install QAIC SDK") # Build dtype mapping once (depends on aicapi constants) self.aic_to_np_dtype_mapping = { aicapi.FLOAT_TYPE: np.dtype(np.float32), From 1a35521c126d080c9bb2915255ba5bb2bbf9bb08 Mon Sep 17 00:00:00 2001 From: Sharvari Medhe Date: Fri, 5 Sep 2025 05:24:00 +0000 Subject: [PATCH 5/7] added flags to check import Signed-off-by: Sharvari Medhe --- QEfficient/generation/cloud_infer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/QEfficient/generation/cloud_infer.py b/QEfficient/generation/cloud_infer.py index d01cbecd8..08077fa30 100644 --- a/QEfficient/generation/cloud_infer.py +++ b/QEfficient/generation/cloud_infer.py @@ -28,12 +28,16 @@ try: import QAicApi_pb2 as aicapi + + is_aicapi_imported = True except ImportError: try: sys.path.append("/opt/qti-aic/dev/python") import QAicApi_pb2 as aicapi + + is_aicapi_imported = True except ImportError: - is_aicapi_imported = False + is_qaicrt_imported = False class QAICInferenceSession: @@ -52,7 +56,7 @@ def __init__( :param activate: If False, activation will be skipped. Default=True. :param enable_debug_logs: If True, enable debug logs. Default=False. """ - if not is_qaicrt_imported and not is_aicapi_imported: + if not (is_qaicrt_imported and is_aicapi_imported): raise ImportError("QAIC runtime not available. Please install QAIC SDK") # Build dtype mapping once (depends on aicapi constants) self.aic_to_np_dtype_mapping = { From 0c123578ab334e924e8d192fcf7f908c72956e0a Mon Sep 17 00:00:00 2001 From: Sharvari Medhe Date: Mon, 8 Sep 2025 09:07:45 +0000 Subject: [PATCH 6/7] added verbose import error when qaic is unavailable Signed-off-by: Sharvari Medhe --- QEfficient/generation/cloud_infer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/QEfficient/generation/cloud_infer.py b/QEfficient/generation/cloud_infer.py index 08077fa30..4d3713328 100644 --- a/QEfficient/generation/cloud_infer.py +++ b/QEfficient/generation/cloud_infer.py @@ -57,7 +57,11 @@ def __init__( :param enable_debug_logs: If True, enable debug logs. Default=False. """ if not (is_qaicrt_imported and is_aicapi_imported): - raise ImportError("QAIC runtime not available. Please install QAIC SDK") + raise ImportError( + "Unable to import `qaicrt` and/or `QAicApi_pb2` libraries required for executing QPC files on the CLOUD AI platform.\n" + "Please ensure that the QAIC platform SDK and apps SDK are installed correctly." + ) + # Build dtype mapping once (depends on aicapi constants) self.aic_to_np_dtype_mapping = { aicapi.FLOAT_TYPE: np.dtype(np.float32), From f2a8afa748fc1261bc7fce55f556526032ef75a3 Mon Sep 17 00:00:00 2001 From: Sharvari Medhe Date: Wed, 10 Sep 2025 10:23:59 +0000 Subject: [PATCH 7/7] adding comments Signed-off-by: Sharvari Medhe --- QEfficient/generation/cloud_infer.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/QEfficient/generation/cloud_infer.py b/QEfficient/generation/cloud_infer.py index 4d3713328..4acc97787 100644 --- a/QEfficient/generation/cloud_infer.py +++ b/QEfficient/generation/cloud_infer.py @@ -181,17 +181,19 @@ def run(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: Return: :Dict[str, np.ndarray]: """ - + # Set inputs self.set_buffers(inputs) if self.execObj.setData(self.qbuffers, self.buf_dims) != qaicrt.QStatus.QS_SUCCESS: raise MemoryError("Failed to setData") - + # # Run with sync API + # if self.execObj.run(self.qbuffers) != qaicrt.QStatus.QS_SUCCESS: + # Run with async API if self.queue.enqueue(self.execObj) != qaicrt.QStatus.QS_SUCCESS: raise MemoryError("Failed to enqueue") if self.execObj.waitForCompletion() != qaicrt.QStatus.QS_SUCCESS: error_message = "Failed to run" - + # Print additional error messages for unmatched dimension error if self.allowed_shapes: error_message += "\n\n(Only if 'No matching dimension found' error is present above)" error_message += "\nAllowed shapes:" @@ -211,11 +213,11 @@ def run(self, inputs: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]: continue error_message += f"{binding.name}:\t{elemsize}\t{shape}\n" raise ValueError(error_message) - + # Get output buffers status, output_qbuffers = self.execObj.getData() if status != qaicrt.QStatus.QS_SUCCESS: raise MemoryError("Failed to getData") - + # Build output outputs = {} for output_name in self.output_names: buffer_index = self.binding_index_map[output_name]