From 713eaa8bc1ae81257d53b01234b666d3565cc131 Mon Sep 17 00:00:00 2001 From: yuyun2000 <15515722313yxw@gmail.com> Date: Thu, 30 Oct 2025 16:37:21 +0800 Subject: [PATCH 1/2] Fix duplicate memory usage when loading model Resolve issue where loading a model creates an extra copy in system memory, causing redundant memory consumption. --- axengine/_axe.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/axengine/_axe.py b/axengine/_axe.py index 16baa5f..6bfb431 100644 --- a/axengine/_axe.py +++ b/axengine/_axe.py @@ -131,13 +131,16 @@ def __init__( self._context = engine_cffi.new("uint64_t **") self._io = engine_cffi.new("AX_ENGINE_IO_T *") - # model buffer, almost copied from onnx runtime + import mmap + if isinstance(path_or_bytes, (str, os.PathLike)): self._model_name = os.path.splitext(os.path.basename(path_or_bytes))[0] with open(path_or_bytes, "rb") as f: - data = f.read() - self._model_buffer = engine_cffi.new("char[]", data) - self._model_buffer_size = len(data) + # Use memory mapping without actually loading into memory + mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + self._model_buffer = engine_cffi.from_buffer("char[]", mmapped_file) + self._model_buffer_size = len(mmapped_file) + self._mmapped_file = mmapped_file # keep elif isinstance(path_or_bytes, bytes): self._model_buffer = engine_cffi.new("char[]", path_or_bytes) self._model_buffer_size = len(path_or_bytes) From 7f4db12dc5d5ec58fa7bb44f20368222e1eddc75 Mon Sep 17 00:00:00 2001 From: yuyun2000 <15515722313yxw@gmail.com> Date: Thu, 30 Oct 2025 16:40:47 +0800 Subject: [PATCH 2/2] Fix duplicate memory usage when loading model Resolve issue where loading a model creates an extra copy in system memory, causing redundant memory consumption. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ebb51bb..c096521 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,7 @@ root@ax650:~/samples# python3 classification.py -m /opt/data/npu/models/mobilene - [zylo117](https://github.com/zylo117): 提供了基于 cffi 的 AXCL Runtime Python API 实现 - [nnn](https://github.com/nnn112358),[HongJie Li](https://github.com/techshoww) 和 [Shinichi Tanaka](https://github.com/s1tnk) 报告 cffi 的使用问题,[Shinichi Tanaka](https://github.com/s1tnk) 提供了解决方案 - +- [yuyun](https://github.com/yuyun2000): 修复了加载模型时会在系统内存重复占用内存的bug ## 关联项目