diff --git a/README.md b/README.md
index ebb51bb..c096521 100644
--- a/README.md
+++ b/README.md
@@ -108,7 +108,7 @@ root@ax650:~/samples# python3 classification.py -m /opt/data/npu/models/mobilene
 
 - [zylo117](https://github.com/zylo117): 提供了基于 cffi 的 AXCL Runtime Python API 实现
 - [nnn](https://github.com/nnn112358)，[HongJie Li](https://github.com/techshoww) 和 [Shinichi Tanaka](https://github.com/s1tnk) 报告 cffi 的使用问题，[Shinichi Tanaka](https://github.com/s1tnk) 提供了解决方案
-
+- [yuyun](https://github.com/yuyun2000): 修复了加载模型时会在系统内存重复占用内存的bug
 
 ## 关联项目
 
diff --git a/axengine/_axe.py b/axengine/_axe.py
index 16baa5f..6bfb431 100644
--- a/axengine/_axe.py
+++ b/axengine/_axe.py
@@ -131,13 +131,16 @@ def __init__(
         self._context = engine_cffi.new("uint64_t **")
         self._io = engine_cffi.new("AX_ENGINE_IO_T *")
 
-        # model buffer, almost copied from onnx runtime
+        import mmap
+
         if isinstance(path_or_bytes, (str, os.PathLike)):
             self._model_name = os.path.splitext(os.path.basename(path_or_bytes))[0]
             with open(path_or_bytes, "rb") as f:
-                data = f.read()
-            self._model_buffer = engine_cffi.new("char[]", data)
-            self._model_buffer_size = len(data)
+                # Use memory mapping without actually loading into memory
+                mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
+                self._model_buffer = engine_cffi.from_buffer("char[]", mmapped_file)
+                self._model_buffer_size = len(mmapped_file)
+                self._mmapped_file = mmapped_file  # keep
         elif isinstance(path_or_bytes, bytes):
             self._model_buffer = engine_cffi.new("char[]", path_or_bytes)
             self._model_buffer_size = len(path_or_bytes)