From cfac59e21a7ca2196761fc8d961d972ba9c6b200 Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Tue, 17 Feb 2026 09:23:05 +0000
Subject: [PATCH 01/13] feat(mllm_kernel): simplify JIT usage in README and
 update kernel example

- Replaced the previous JIT utility functions with a streamlined `jit` decorator for kernel registration.
- Updated the README.md to reflect the new recommended pattern for CPU kernel implementation.
- Simplified the example for using the JIT compilation with a focus on clarity and ease of use.
---
 mllm-kernel/README.md | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/mllm-kernel/README.md b/mllm-kernel/README.md
index 14c8118f..0a458049 100644
--- a/mllm-kernel/README.md
+++ b/mllm-kernel/README.md
@@ -80,31 +80,30 @@ y = add_constant(x, 8)
 
 Use the helpers in `mllm_kernel.jit_utils`:
 
-- `load_cpu_jit`
-- `load_cuda_jit`
+- `jit`
 - `make_cpp_args`
-- `cache_once`
 
-Example pattern:
+Recommended pattern (CPU example):
 
 ```python
 import torch
-from mllm_kernel.jit_utils import cache_once, load_cpu_jit, make_cpp_args
-
-@cache_once
-def _jit_my_kernel_module(param: int):
-    args = make_cpp_args(param)
-    return load_cpu_jit(
-        "my_kernel",
-        *args,
-        cpp_files=["my_kernel.cpp"],
-        cpp_wrappers=[("my_kernel", f"my_namespace::my_kernel<{args}>")],
-    )
+import mllm_kernel
+
+@mllm_kernel.jit(
+    args=16,
+    device="cpu",
+    cpp_files=["my_kernel.cpp"],
+    cpp_wrappers=[("my_kernel", "my_namespace::my_kernel<16>")],
+    func_name="my_kernel",
+)
+def _my_kernel_16(compiled_module, dst: torch.Tensor, src: torch.Tensor) -> None:
+    compiled_module.my_kernel(dst, src)
 
 def my_kernel(src: torch.Tensor, param: int) -> torch.Tensor:
+    if param != 16:
+        raise ValueError("This demo only supports param=16.")
     dst = torch.empty_like(src)
-    module = _jit_my_kernel_module(param)
-    module.my_kernel(dst, src)
+    _my_kernel_16(dst, src)
     return dst
 ```
 

From 8f3485a3bb7a7e8e73066d993b312128130ed8ef Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Wed, 18 Feb 2026 03:39:52 +0000
Subject: [PATCH 02/13] feat: update dependencies and refactor mobile module
 structure

- Updated `apache-tvm-ffi` version to `0.1.8` in `pyproject.toml` and `mllm-kernel/pyproject.toml`.
- Refactored mobile module imports and structure, moving scripts to `pymllm.mobile` and removing unused backends.
- Introduced new classes and methods for quantization and model deployment in the Qualcomm backend.
- Added new README files for mobile and Qualcomm transformer components.
---
 mllm-kernel/pyproject.toml                    |  2 +-
 mllm/ffi/Extension.cc                         |  5 +-
 mllm/ffi/vendors/tvm-ffi                      |  2 +-
 pymllm/__init__.py                            | 72 ++++++++-----------
 .../cuda/__init__.py => __main__.py}          |  0
 pymllm/backends/__init__.py                   |  4 --
 pymllm/backends/cuda/tilelang_compile_test.py | 41 -----------
 .../transformers/core => layers}/__init__.py  |  0
 pymllm/mobile/README.md                       |  3 +-
 pymllm/mobile/__init__.py                     | 45 ++++++++++++
 .../spinquant => mobile/backends}/__init__.py |  2 +
 .../{ => mobile}/backends/qualcomm/README.md  |  0
 .../backends/qualcomm/__init__.py             |  0
 pymllm/{ => mobile}/backends/qualcomm/nn.py   |  2 +-
 .../backends/qualcomm/qnn_aot_env.py          |  4 +-
 .../backends/qualcomm/transformers/.gitignore |  0
 .../backends/qualcomm/transformers/README.md  |  0
 .../qualcomm/transformers/__init__.py         |  0
 .../qualcomm/transformers/core}/__init__.py   |  0
 .../qualcomm/transformers/core/embedding.py   |  0
 .../qualcomm/transformers/core/observer.py    |  0
 .../qualcomm/transformers/core/qdq.py         |  0
 .../qualcomm/transformers/core/qlinear.py     |  2 +-
 .../qualcomm/transformers/core/rms_norm.py    |  0
 .../transformers/llama/modeling_llama.py      | 10 +--
 .../qualcomm/transformers/llama/runner.py     | 12 ++--
 .../qualcomm/transformers/llama/train.py      |  2 +-
 .../transformers/qwen2/modeling_qwen2.py      | 10 +--
 .../qualcomm/transformers/qwen2/runner.py     | 12 ++--
 .../qualcomm/transformers/qwen2/train.py      |  2 +-
 .../transformers/qwen3/modeling_qwen3.py      | 10 +--
 .../qualcomm/transformers/qwen3/runner.py     | 12 ++--
 .../qualcomm/transformers/qwen3/train.py      |  2 +-
 pymllm/{ => mobile}/convertor/__init__.py     |  0
 .../convertor/mllm_type_mapping.py            |  0
 .../{ => mobile}/convertor/model_file_v1.py   |  0
 .../{ => mobile}/convertor/model_file_v2.py   |  0
 pymllm/{ => mobile}/ffi/__init__.py           |  0
 pymllm/{ => mobile}/ffi/_ffi_api.py           |  0
 pymllm/{ => mobile}/ffi/base.py               |  2 +-
 pymllm/{ => mobile}/nn/__init__.py            |  0
 pymllm/{ => mobile}/nn/_layers.py             |  0
 pymllm/{ => mobile}/nn/_module.py             |  0
 pymllm/{ => mobile}/nn/functional.py          |  0
 pymllm/{ => mobile}/quantize/__init__.py      |  0
 .../{ => mobile}/quantize/cast2fp32_pass.py   |  0
 .../quantize/gguf}/__init__.py                |  0
 pymllm/{ => mobile}/quantize/kai/__init__.py  |  0
 pymllm/{ => mobile}/quantize/kai/w4a32.py     |  0
 pymllm/{ => mobile}/quantize/pipeline.py      |  0
 pymllm/{ => mobile}/quantize/quantize_pass.py |  0
 pymllm/{ => mobile}/quantize/solver.py        |  0
 .../quantize/spinquant}/__init__.py           |  0
 pymllm/{ => mobile}/service/__init__.py       |  0
 pymllm/{ => mobile}/service/models_hub.py     |  0
 pymllm/{ => mobile}/service/network.py        |  0
 pymllm/{ => mobile}/service/rr_process.py     |  0
 pymllm/{ => mobile}/service/tools.py          |  0
 .../tests/qualcomm/test_context_create.py     |  4 +-
 pymllm/{ => mobile}/tests/test_nn.py          |  4 +-
 pymllm/{ => mobile}/tests/test_tensor.py      |  2 +-
 pymllm/{ => mobile}/utils/__init__.py         |  0
 pymllm/{ => mobile}/utils/adb.py              |  0
 pymllm/{ => mobile}/utils/error_handler.py    |  0
 pymllm/{ => mobile}/utils/mllm_convertor.py   |  0
 .../mllm_ir/trace.py => models/__init__.py}   |  0
 pymllm/utils/mllm_convertor_server/service.py |  2 -
 pyproject.toml                                |  8 +--
 68 files changed, 132 insertions(+), 146 deletions(-)
 rename pymllm/{backends/cuda/__init__.py => __main__.py} (100%)
 delete mode 100644 pymllm/backends/__init__.py
 delete mode 100644 pymllm/backends/cuda/tilelang_compile_test.py
 rename pymllm/{backends/qualcomm/transformers/core => layers}/__init__.py (100%)
 create mode 100644 pymllm/mobile/__init__.py
 rename pymllm/{quantize/spinquant => mobile/backends}/__init__.py (71%)
 rename pymllm/{ => mobile}/backends/qualcomm/README.md (100%)
 rename pymllm/{ => mobile}/backends/qualcomm/__init__.py (100%)
 rename pymllm/{ => mobile}/backends/qualcomm/nn.py (75%)
 rename pymllm/{ => mobile}/backends/qualcomm/qnn_aot_env.py (83%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/.gitignore (100%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/README.md (100%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/__init__.py (100%)
 rename pymllm/{compile/mlir => mobile/backends/qualcomm/transformers/core}/__init__.py (100%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/core/embedding.py (100%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/core/observer.py (100%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/core/qdq.py (100%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/core/qlinear.py (99%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/core/rms_norm.py (100%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/llama/modeling_llama.py (98%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/llama/runner.py (96%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/llama/train.py (94%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/qwen2/modeling_qwen2.py (98%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/qwen2/runner.py (96%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/qwen2/train.py (94%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/qwen3/modeling_qwen3.py (98%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/qwen3/runner.py (96%)
 rename pymllm/{ => mobile}/backends/qualcomm/transformers/qwen3/train.py (94%)
 rename pymllm/{ => mobile}/convertor/__init__.py (100%)
 rename pymllm/{ => mobile}/convertor/mllm_type_mapping.py (100%)
 rename pymllm/{ => mobile}/convertor/model_file_v1.py (100%)
 rename pymllm/{ => mobile}/convertor/model_file_v2.py (100%)
 rename pymllm/{ => mobile}/ffi/__init__.py (100%)
 rename pymllm/{ => mobile}/ffi/_ffi_api.py (100%)
 rename pymllm/{ => mobile}/ffi/base.py (90%)
 rename pymllm/{ => mobile}/nn/__init__.py (100%)
 rename pymllm/{ => mobile}/nn/_layers.py (100%)
 rename pymllm/{ => mobile}/nn/_module.py (100%)
 rename pymllm/{ => mobile}/nn/functional.py (100%)
 rename pymllm/{ => mobile}/quantize/__init__.py (100%)
 rename pymllm/{ => mobile}/quantize/cast2fp32_pass.py (100%)
 rename pymllm/{compile => mobile/quantize/gguf}/__init__.py (100%)
 rename pymllm/{ => mobile}/quantize/kai/__init__.py (100%)
 rename pymllm/{ => mobile}/quantize/kai/w4a32.py (100%)
 rename pymllm/{ => mobile}/quantize/pipeline.py (100%)
 rename pymllm/{ => mobile}/quantize/quantize_pass.py (100%)
 rename pymllm/{ => mobile}/quantize/solver.py (100%)
 rename pymllm/{quantize/gguf => mobile/quantize/spinquant}/__init__.py (100%)
 rename pymllm/{ => mobile}/service/__init__.py (100%)
 rename pymllm/{ => mobile}/service/models_hub.py (100%)
 rename pymllm/{ => mobile}/service/network.py (100%)
 rename pymllm/{ => mobile}/service/rr_process.py (100%)
 rename pymllm/{ => mobile}/service/tools.py (100%)
 rename pymllm/{ => mobile}/tests/qualcomm/test_context_create.py (89%)
 rename pymllm/{ => mobile}/tests/test_nn.py (83%)
 rename pymllm/{ => mobile}/tests/test_tensor.py (89%)
 rename pymllm/{ => mobile}/utils/__init__.py (100%)
 rename pymllm/{ => mobile}/utils/adb.py (100%)
 rename pymllm/{ => mobile}/utils/error_handler.py (100%)
 rename pymllm/{ => mobile}/utils/mllm_convertor.py (100%)
 rename pymllm/{compile/mllm_ir/trace.py => models/__init__.py} (100%)
 delete mode 100644 pymllm/utils/mllm_convertor_server/service.py

diff --git a/mllm-kernel/pyproject.toml b/mllm-kernel/pyproject.toml
index f64e1306..5fe07eea 100644
--- a/mllm-kernel/pyproject.toml
+++ b/mllm-kernel/pyproject.toml
@@ -18,7 +18,7 @@ dependencies = [
   "packaging",
   "torch",
   "torch-c-dlpack-ext",
-  "apache-tvm-ffi",
+  "apache-tvm-ffi == 0.1.8",
 ]
 
 [project.optional-dependencies]
diff --git a/mllm/ffi/Extension.cc b/mllm/ffi/Extension.cc
index cb999191..f3f2d248 100644
--- a/mllm/ffi/Extension.cc
+++ b/mllm/ffi/Extension.cc
@@ -83,12 +83,12 @@ TVM_FFI_STATIC_INIT_BLOCK() {
   // Tensor related
   refl::GlobalDef().def("mllm.empty", mllm::ffi::empty);
   refl::GlobalDef().def("mllm.from_torch", [](const tvm::ffi::Tensor& t) -> mllm::ffi::Tensor {
-    auto dl_pack = t.get()->ToDLPack();
+    auto dl_pack = t.ToDLPack();
     return ::mllm::ffi::Tensor(mllm::ffi::__from_dlpack(dl_pack));
   });
 
   refl::GlobalDef().def("mllm.from_numpy", [](const tvm::ffi::Tensor& t) -> mllm::ffi::Tensor {
-    auto dl_pack = t.get()->ToDLPack();
+    auto dl_pack = t.ToDLPack();
     return ::mllm::ffi::Tensor(mllm::ffi::__from_dlpack(dl_pack));
   });
 
@@ -345,6 +345,7 @@ TVM_FFI_STATIC_INIT_BLOCK() {
   namespace refl = tvm::ffi::reflection;
 
   refl::ObjectDef<::mllm::ffi::BaseOpObj>();
+  refl::ObjectDef<::mllm::ffi::ParameterFileObj>();
   refl::GlobalDef().def("mllm.BaseOp.load", [](const mllm::ffi::BaseOp& self, const mllm::ffi::ParameterFile& obj) -> void {
     self.get()->op_ptr_->load(obj.get()->pf_ptr_);
   });
diff --git a/mllm/ffi/vendors/tvm-ffi b/mllm/ffi/vendors/tvm-ffi
index 46f73580..dcd07cfe 160000
--- a/mllm/ffi/vendors/tvm-ffi
+++ b/mllm/ffi/vendors/tvm-ffi
@@ -1 +1 @@
-Subproject commit 46f73580780f2973e6ea3afb6d3a9d6f6ffd02cc
+Subproject commit dcd07cfe27465287ee5b203b742e85dcfb99606a
diff --git a/pymllm/__init__.py b/pymllm/__init__.py
index 1bd31cd6..3f2488d2 100644
--- a/pymllm/__init__.py
+++ b/pymllm/__init__.py
@@ -2,48 +2,32 @@
 # Licensed under the MIT License.
 
 from __future__ import annotations
+import os
+import sys
 
-from . import ffi
-from . import convertor
-from . import utils
-from . import quantize
-from . import nn
-from . import compile
-from . import service
-from . import backends
-from .ffi import (
-    # Floating point types
-    float32,
-    float16,
-    bfloat16,
-    # Signed integer types
-    int8,
-    int16,
-    int32,
-    int64,
-    # Unsigned integer types
-    uint8,
-    uint16,
-    uint32,
-    uint64,
-    # Bool type
-    boolean,
-    # Devices
-    cpu,
-    cuda,
-    qnn,
-    # Tensor and utilities
-    Tensor,
-    empty,
-    echo,
-    device,
-    is_torch_available,
-    is_numpy_available,
-    from_torch,
-    from_numpy,
-    zeros,
-    ones,
-    arange,
-    random,
-)
-from .nn.functional import matmul
+__all__ = []
+
+
+def _has_mobile_libs() -> bool:
+    parent_dir = os.path.dirname(os.path.realpath(__file__))
+
+    # Platform-specific library names
+    if sys.platform.startswith("win32"):
+        lib_name = "MllmFFIExtension.dll"
+    elif sys.platform.startswith("darwin"):
+        lib_name = "MllmFFIExtension.dylib"
+    else:
+        lib_name = "MllmFFIExtension.so"
+
+    lib_path = os.path.join(parent_dir, "lib", lib_name)
+    return os.path.exists(lib_path)
+
+
+def is_mobile_available() -> bool:
+    return _has_mobile_libs()
+
+
+if _has_mobile_libs():
+    from . import mobile
+
+    __all__.append("mobile")
diff --git a/pymllm/backends/cuda/__init__.py b/pymllm/__main__.py
similarity index 100%
rename from pymllm/backends/cuda/__init__.py
rename to pymllm/__main__.py
diff --git a/pymllm/backends/__init__.py b/pymllm/backends/__init__.py
deleted file mode 100644
index 5e926d58..00000000
--- a/pymllm/backends/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) MLLM Team.
-# Licensed under the MIT License.
-
-from . import cuda, qualcomm
diff --git a/pymllm/backends/cuda/tilelang_compile_test.py b/pymllm/backends/cuda/tilelang_compile_test.py
deleted file mode 100644
index 65a2e007..00000000
--- a/pymllm/backends/cuda/tilelang_compile_test.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import tilelang
-import tilelang.language as T
-
-
-@tilelang.jit(
-    out_idx=[-1], compile_flags=["-O3", "--use_fast_math", "--expt-relaxed-constexpr"]
-)
-def elementwise_add(M, N, block_M, block_N, in_dtype, out_dtype, threads):
-    @T.prim_func
-    def elem_add(
-        A: T.Tensor((M, N), in_dtype),
-        B: T.Tensor((M, N), in_dtype),
-        C: T.Tensor((M, N), out_dtype),
-    ):
-        with T.Kernel(
-            T.ceildiv(N, block_N), T.ceildiv(M, block_M), threads=threads
-        ) as (bx, by):
-            A_shared = T.alloc_shared((block_M, block_N), in_dtype)
-            B_shared = T.alloc_shared((block_M, block_N), in_dtype)
-            C_local = T.alloc_fragment((block_M, block_N), out_dtype)
-            C_shared = T.alloc_shared((block_M, block_N), out_dtype)
-
-            T.copy(A[by * block_M, bx * block_N], A_shared)
-            T.copy(B[by * block_M, bx * block_N], B_shared)
-            for local_y, local_x in T.Parallel(block_M, block_N):
-                C_local[local_y, local_x] = (
-                    A_shared[local_y, local_x] + B_shared[local_y, local_x]
-                )
-            T.copy(C_local, C_shared)
-            T.copy(C_shared, C[by * block_M, bx * block_N])
-
-    return elem_add
-
-
-def compile_test():
-    M = 1024
-    N = 1024
-    config = {"block_M": 128, "block_N": 128, "threads": 128}
-    kernel = elementwise_add(M, N, **config, in_dtype="float16", out_dtype="float16")
-    source = kernel.get_kernel_source()
-    print(source)
diff --git a/pymllm/backends/qualcomm/transformers/core/__init__.py b/pymllm/layers/__init__.py
similarity index 100%
rename from pymllm/backends/qualcomm/transformers/core/__init__.py
rename to pymllm/layers/__init__.py
diff --git a/pymllm/mobile/README.md b/pymllm/mobile/README.md
index 29877ea0..ceb71a5d 100644
--- a/pymllm/mobile/README.md
+++ b/pymllm/mobile/README.md
@@ -1 +1,2 @@
-We should refactor current pymllm's src to mobile directory. And provide more functionalities for torch based VLA.
+# Pymllm mobile
+
diff --git a/pymllm/mobile/__init__.py b/pymllm/mobile/__init__.py
new file mode 100644
index 00000000..8796bbea
--- /dev/null
+++ b/pymllm/mobile/__init__.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from . import ffi
+from . import convertor
+from . import utils
+from . import quantize
+from . import nn
+from . import service
+from . import backends
+from .ffi import (
+    # Floating point types
+    float32,
+    float16,
+    bfloat16,
+    # Signed integer types
+    int8,
+    int16,
+    int32,
+    int64,
+    # Unsigned integer types
+    uint8,
+    uint16,
+    uint32,
+    uint64,
+    # Bool type
+    boolean,
+    # Devices
+    cpu,
+    cuda,
+    qnn,
+    # Tensor and utilities
+    Tensor,
+    empty,
+    echo,
+    device,
+    is_torch_available,
+    is_numpy_available,
+    from_torch,
+    from_numpy,
+    zeros,
+    ones,
+    arange,
+    random,
+)
+from .nn.functional import matmul
diff --git a/pymllm/quantize/spinquant/__init__.py b/pymllm/mobile/backends/__init__.py
similarity index 71%
rename from pymllm/quantize/spinquant/__init__.py
rename to pymllm/mobile/backends/__init__.py
index ea8e2bec..1578a0d8 100644
--- a/pymllm/quantize/spinquant/__init__.py
+++ b/pymllm/mobile/backends/__init__.py
@@ -1,2 +1,4 @@
 # Copyright (c) MLLM Team.
 # Licensed under the MIT License.
+
+from . import qualcomm
diff --git a/pymllm/backends/qualcomm/README.md b/pymllm/mobile/backends/qualcomm/README.md
similarity index 100%
rename from pymllm/backends/qualcomm/README.md
rename to pymllm/mobile/backends/qualcomm/README.md
diff --git a/pymllm/backends/qualcomm/__init__.py b/pymllm/mobile/backends/qualcomm/__init__.py
similarity index 100%
rename from pymllm/backends/qualcomm/__init__.py
rename to pymllm/mobile/backends/qualcomm/__init__.py
diff --git a/pymllm/backends/qualcomm/nn.py b/pymllm/mobile/backends/qualcomm/nn.py
similarity index 75%
rename from pymllm/backends/qualcomm/nn.py
rename to pymllm/mobile/backends/qualcomm/nn.py
index 0ba9aef5..e4bc91ac 100644
--- a/pymllm/backends/qualcomm/nn.py
+++ b/pymllm/mobile/backends/qualcomm/nn.py
@@ -1,4 +1,4 @@
-from pymllm.nn._layers import Softmax, RoPE
+from pymllm.mobile.nn._layers import Softmax, RoPE
 
 
 class QnnSoftmax(Softmax):
diff --git a/pymllm/backends/qualcomm/qnn_aot_env.py b/pymllm/mobile/backends/qualcomm/qnn_aot_env.py
similarity index 83%
rename from pymllm/backends/qualcomm/qnn_aot_env.py
rename to pymllm/mobile/backends/qualcomm/qnn_aot_env.py
index 8b0c0d2e..bc48c7c9 100644
--- a/pymllm/backends/qualcomm/qnn_aot_env.py
+++ b/pymllm/mobile/backends/qualcomm/qnn_aot_env.py
@@ -1,7 +1,7 @@
-from pymllm.ffi import is_qnn_aot_on_x86_enabled
+from pymllm.mobile.ffi import is_qnn_aot_on_x86_enabled
 
 if is_qnn_aot_on_x86_enabled():
-    from pymllm.ffi import (
+    from pymllm.mobile.ffi import (
         QnnDeviceAndContext,
         QnnAOTEnv,
         QcomChipset,
diff --git a/pymllm/backends/qualcomm/transformers/.gitignore b/pymllm/mobile/backends/qualcomm/transformers/.gitignore
similarity index 100%
rename from pymllm/backends/qualcomm/transformers/.gitignore
rename to pymllm/mobile/backends/qualcomm/transformers/.gitignore
diff --git a/pymllm/backends/qualcomm/transformers/README.md b/pymllm/mobile/backends/qualcomm/transformers/README.md
similarity index 100%
rename from pymllm/backends/qualcomm/transformers/README.md
rename to pymllm/mobile/backends/qualcomm/transformers/README.md
diff --git a/pymllm/backends/qualcomm/transformers/__init__.py b/pymllm/mobile/backends/qualcomm/transformers/__init__.py
similarity index 100%
rename from pymllm/backends/qualcomm/transformers/__init__.py
rename to pymllm/mobile/backends/qualcomm/transformers/__init__.py
diff --git a/pymllm/compile/mlir/__init__.py b/pymllm/mobile/backends/qualcomm/transformers/core/__init__.py
similarity index 100%
rename from pymllm/compile/mlir/__init__.py
rename to pymllm/mobile/backends/qualcomm/transformers/core/__init__.py
diff --git a/pymllm/backends/qualcomm/transformers/core/embedding.py b/pymllm/mobile/backends/qualcomm/transformers/core/embedding.py
similarity index 100%
rename from pymllm/backends/qualcomm/transformers/core/embedding.py
rename to pymllm/mobile/backends/qualcomm/transformers/core/embedding.py
diff --git a/pymllm/backends/qualcomm/transformers/core/observer.py b/pymllm/mobile/backends/qualcomm/transformers/core/observer.py
similarity index 100%
rename from pymllm/backends/qualcomm/transformers/core/observer.py
rename to pymllm/mobile/backends/qualcomm/transformers/core/observer.py
diff --git a/pymllm/backends/qualcomm/transformers/core/qdq.py b/pymllm/mobile/backends/qualcomm/transformers/core/qdq.py
similarity index 100%
rename from pymllm/backends/qualcomm/transformers/core/qdq.py
rename to pymllm/mobile/backends/qualcomm/transformers/core/qdq.py
diff --git a/pymllm/backends/qualcomm/transformers/core/qlinear.py b/pymllm/mobile/backends/qualcomm/transformers/core/qlinear.py
similarity index 99%
rename from pymllm/backends/qualcomm/transformers/core/qlinear.py
rename to pymllm/mobile/backends/qualcomm/transformers/core/qlinear.py
index 9e90ba8a..35439180 100644
--- a/pymllm/backends/qualcomm/transformers/core/qlinear.py
+++ b/pymllm/mobile/backends/qualcomm/transformers/core/qlinear.py
@@ -2,7 +2,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.ao.quantization import FakeQuantize, PerChannelMinMaxObserver
-from pymllm.backends.qualcomm.transformers.core.observer import (
+from pymllm.mobile.backends.qualcomm.transformers.core.observer import (
     PerBlockParamFakeQuantize,
 )
 from torchao.quantization.quant_primitives import (
diff --git a/pymllm/backends/qualcomm/transformers/core/rms_norm.py b/pymllm/mobile/backends/qualcomm/transformers/core/rms_norm.py
similarity index 100%
rename from pymllm/backends/qualcomm/transformers/core/rms_norm.py
rename to pymllm/mobile/backends/qualcomm/transformers/core/rms_norm.py
diff --git a/pymllm/backends/qualcomm/transformers/llama/modeling_llama.py b/pymllm/mobile/backends/qualcomm/transformers/llama/modeling_llama.py
similarity index 98%
rename from pymllm/backends/qualcomm/transformers/llama/modeling_llama.py
rename to pymllm/mobile/backends/qualcomm/transformers/llama/modeling_llama.py
index 119ec04b..6b65f34b 100644
--- a/pymllm/backends/qualcomm/transformers/llama/modeling_llama.py
+++ b/pymllm/mobile/backends/qualcomm/transformers/llama/modeling_llama.py
@@ -52,16 +52,16 @@
 from transformers.models.llama.configuration_llama import LlamaConfig
 
 # Replace linear, rms_norm with:
-from pymllm.backends.qualcomm.transformers.core.rms_norm import QRMSNorm
-from pymllm.backends.qualcomm.transformers.core.qlinear import (
+from pymllm.mobile.backends.qualcomm.transformers.core.rms_norm import QRMSNorm
+from pymllm.mobile.backends.qualcomm.transformers.core.qlinear import (
     QLinearLPBQ,
 )
-from pymllm.backends.qualcomm.transformers.core.qdq import (
+from pymllm.mobile.backends.qualcomm.transformers.core.qdq import (
     ActivationQDQ,
     FixedActivationQDQ,
 )
-from pymllm.backends.qualcomm.transformers.core.embedding import QEmbedding
-from pymllm.backends.qualcomm.transformers.core.observer import ConcatObserver
+from pymllm.mobile.backends.qualcomm.transformers.core.embedding import QEmbedding
+from pymllm.mobile.backends.qualcomm.transformers.core.observer import ConcatObserver
 
 
 logger = logging.get_logger(__name__)
diff --git a/pymllm/backends/qualcomm/transformers/llama/runner.py b/pymllm/mobile/backends/qualcomm/transformers/llama/runner.py
similarity index 96%
rename from pymllm/backends/qualcomm/transformers/llama/runner.py
rename to pymllm/mobile/backends/qualcomm/transformers/llama/runner.py
index 8aa4627b..730147d0 100644
--- a/pymllm/backends/qualcomm/transformers/llama/runner.py
+++ b/pymllm/mobile/backends/qualcomm/transformers/llama/runner.py
@@ -2,18 +2,18 @@
 from tqdm import tqdm
 from modelscope.msdatasets import MsDataset
 from transformers import AutoTokenizer
-from pymllm.backends.qualcomm.transformers.core.qdq import (
+from pymllm.mobile.backends.qualcomm.transformers.core.qdq import (
     ActivationQDQ,
     FixedActivationQDQ,
 )
-from pymllm.backends.qualcomm.transformers.core.rms_norm import QRMSNorm
-from pymllm.backends.qualcomm.transformers.core.qlinear import (
+from pymllm.mobile.backends.qualcomm.transformers.core.rms_norm import QRMSNorm
+from pymllm.mobile.backends.qualcomm.transformers.core.qlinear import (
     QLinearLPBQ,
     QLinearW8A16_PerChannelSym,
 )
-from pymllm.backends.qualcomm.transformers.core.embedding import QEmbedding
-from pymllm.backends.qualcomm.transformers.llama.modeling_llama import LlamaForCausalLM
-from pymllm.backends.qualcomm.transformers.core.observer import ConcatObserver
+from pymllm.mobile.backends.qualcomm.transformers.core.embedding import QEmbedding
+from pymllm.mobile.backends.qualcomm.transformers.llama.modeling_llama import LlamaForCausalLM
+from pymllm.mobile.backends.qualcomm.transformers.core.observer import ConcatObserver
 
 
 def recompute_scale_zp(module):
diff --git a/pymllm/backends/qualcomm/transformers/llama/train.py b/pymllm/mobile/backends/qualcomm/transformers/llama/train.py
similarity index 94%
rename from pymllm/backends/qualcomm/transformers/llama/train.py
rename to pymllm/mobile/backends/qualcomm/transformers/llama/train.py
index cd10befb..41ffc0e2 100644
--- a/pymllm/backends/qualcomm/transformers/llama/train.py
+++ b/pymllm/mobile/backends/qualcomm/transformers/llama/train.py
@@ -2,7 +2,7 @@
 import torch
 import argparse
 from safetensors.torch import save_model
-from pymllm.backends.qualcomm.transformers.llama.runner import LlamaQuantizer
+from pymllm.mobile.backends.qualcomm.transformers.llama.runner import LlamaQuantizer
 
 
 def main():
diff --git a/pymllm/backends/qualcomm/transformers/qwen2/modeling_qwen2.py b/pymllm/mobile/backends/qualcomm/transformers/qwen2/modeling_qwen2.py
similarity index 98%
rename from pymllm/backends/qualcomm/transformers/qwen2/modeling_qwen2.py
rename to pymllm/mobile/backends/qualcomm/transformers/qwen2/modeling_qwen2.py
index 56b19c42..a43d8b7e 100644
--- a/pymllm/backends/qualcomm/transformers/qwen2/modeling_qwen2.py
+++ b/pymllm/mobile/backends/qualcomm/transformers/qwen2/modeling_qwen2.py
@@ -31,16 +31,16 @@
 from transformers.models.qwen2.configuration_qwen2 import Qwen2Config
 
 # Replace linear, rms_norm with:
-from pymllm.backends.qualcomm.transformers.core.rms_norm import QRMSNorm
-from pymllm.backends.qualcomm.transformers.core.qlinear import (
+from pymllm.mobile.backends.qualcomm.transformers.core.rms_norm import QRMSNorm
+from pymllm.mobile.backends.qualcomm.transformers.core.qlinear import (
     QLinearLPBQ,
 )
-from pymllm.backends.qualcomm.transformers.core.qdq import (
+from pymllm.mobile.backends.qualcomm.transformers.core.qdq import (
     ActivationQDQ,
     FixedActivationQDQ,
 )
-from pymllm.backends.qualcomm.transformers.core.embedding import QEmbedding
-from pymllm.backends.qualcomm.transformers.core.observer import ConcatObserver
+from pymllm.mobile.backends.qualcomm.transformers.core.embedding import QEmbedding
+from pymllm.mobile.backends.qualcomm.transformers.core.observer import ConcatObserver
 
 
 class Qwen2MLP(nn.Module):
diff --git a/pymllm/backends/qualcomm/transformers/qwen2/runner.py b/pymllm/mobile/backends/qualcomm/transformers/qwen2/runner.py
similarity index 96%
rename from pymllm/backends/qualcomm/transformers/qwen2/runner.py
rename to pymllm/mobile/backends/qualcomm/transformers/qwen2/runner.py
index d2f5be05..ce55fd06 100644
--- a/pymllm/backends/qualcomm/transformers/qwen2/runner.py
+++ b/pymllm/mobile/backends/qualcomm/transformers/qwen2/runner.py
@@ -2,18 +2,18 @@
 from tqdm import tqdm
 from modelscope.msdatasets import MsDataset
 from transformers import AutoTokenizer
-from pymllm.backends.qualcomm.transformers.core.qdq import (
+from pymllm.mobile.backends.qualcomm.transformers.core.qdq import (
     ActivationQDQ,
     FixedActivationQDQ,
 )
-from pymllm.backends.qualcomm.transformers.core.rms_norm import QRMSNorm
-from pymllm.backends.qualcomm.transformers.core.qlinear import (
+from pymllm.mobile.backends.qualcomm.transformers.core.rms_norm import QRMSNorm
+from pymllm.mobile.backends.qualcomm.transformers.core.qlinear import (
     QLinearLPBQ,
     QLinearW8A16_PerChannelSym,
 )
-from pymllm.backends.qualcomm.transformers.core.embedding import QEmbedding
-from pymllm.backends.qualcomm.transformers.qwen2.modeling_qwen2 import Qwen2ForCausalLM
-from pymllm.backends.qualcomm.transformers.core.observer import ConcatObserver
+from pymllm.mobile.backends.qualcomm.transformers.core.embedding import QEmbedding
+from pymllm.mobile.backends.qualcomm.transformers.qwen2.modeling_qwen2 import Qwen2ForCausalLM
+from pymllm.mobile.backends.qualcomm.transformers.core.observer import ConcatObserver
 
 
 def recompute_scale_zp(module):
diff --git a/pymllm/backends/qualcomm/transformers/qwen2/train.py b/pymllm/mobile/backends/qualcomm/transformers/qwen2/train.py
similarity index 94%
rename from pymllm/backends/qualcomm/transformers/qwen2/train.py
rename to pymllm/mobile/backends/qualcomm/transformers/qwen2/train.py
index fec5fdfc..1a8f25ce 100644
--- a/pymllm/backends/qualcomm/transformers/qwen2/train.py
+++ b/pymllm/mobile/backends/qualcomm/transformers/qwen2/train.py
@@ -2,7 +2,7 @@
 import torch
 import argparse
 from safetensors.torch import save_model
-from pymllm.backends.qualcomm.transformers.qwen2.runner import Qwen2Quantizer
+from pymllm.mobile.backends.qualcomm.transformers.qwen2.runner import Qwen2Quantizer
 
 
 def main():
diff --git a/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py b/pymllm/mobile/backends/qualcomm/transformers/qwen3/modeling_qwen3.py
similarity index 98%
rename from pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py
rename to pymllm/mobile/backends/qualcomm/transformers/qwen3/modeling_qwen3.py
index 2dabf5c9..6a8788ba 100644
--- a/pymllm/backends/qualcomm/transformers/qwen3/modeling_qwen3.py
+++ b/pymllm/mobile/backends/qualcomm/transformers/qwen3/modeling_qwen3.py
@@ -46,16 +46,16 @@
 from transformers.models.qwen3.configuration_qwen3 import Qwen3Config
 
 # Replace linear, rms_norm with:
-from pymllm.backends.qualcomm.transformers.core.rms_norm import QRMSNorm
-from pymllm.backends.qualcomm.transformers.core.qlinear import (
+from pymllm.mobile.backends.qualcomm.transformers.core.rms_norm import QRMSNorm
+from pymllm.mobile.backends.qualcomm.transformers.core.qlinear import (
     QLinearLPBQ,
 )
-from pymllm.backends.qualcomm.transformers.core.qdq import (
+from pymllm.mobile.backends.qualcomm.transformers.core.qdq import (
     ActivationQDQ,
     FixedActivationQDQ,
 )
-from pymllm.backends.qualcomm.transformers.core.embedding import QEmbedding
-from pymllm.backends.qualcomm.transformers.core.observer import ConcatObserver
+from pymllm.mobile.backends.qualcomm.transformers.core.embedding import QEmbedding
+from pymllm.mobile.backends.qualcomm.transformers.core.observer import ConcatObserver
 
 
 class Qwen3MLP(nn.Module):
diff --git a/pymllm/backends/qualcomm/transformers/qwen3/runner.py b/pymllm/mobile/backends/qualcomm/transformers/qwen3/runner.py
similarity index 96%
rename from pymllm/backends/qualcomm/transformers/qwen3/runner.py
rename to pymllm/mobile/backends/qualcomm/transformers/qwen3/runner.py
index 02ea6a5f..0d7499c9 100644
--- a/pymllm/backends/qualcomm/transformers/qwen3/runner.py
+++ b/pymllm/mobile/backends/qualcomm/transformers/qwen3/runner.py
@@ -2,18 +2,18 @@
 from tqdm import tqdm
 from modelscope.msdatasets import MsDataset
 from transformers import AutoTokenizer
-from pymllm.backends.qualcomm.transformers.core.qdq import (
+from pymllm.mobile.backends.qualcomm.transformers.core.qdq import (
     ActivationQDQ,
     FixedActivationQDQ,
 )
-from pymllm.backends.qualcomm.transformers.core.rms_norm import QRMSNorm
-from pymllm.backends.qualcomm.transformers.core.qlinear import (
+from pymllm.mobile.backends.qualcomm.transformers.core.rms_norm import QRMSNorm
+from pymllm.mobile.backends.qualcomm.transformers.core.qlinear import (
     QLinearLPBQ,
     QLinearW8A16_PerChannelSym,
 )
-from pymllm.backends.qualcomm.transformers.core.embedding import QEmbedding
-from pymllm.backends.qualcomm.transformers.qwen3.modeling_qwen3 import Qwen3ForCausalLM
-from pymllm.backends.qualcomm.transformers.core.observer import ConcatObserver
+from pymllm.mobile.backends.qualcomm.transformers.core.embedding import QEmbedding
+from pymllm.mobile.backends.qualcomm.transformers.qwen3.modeling_qwen3 import Qwen3ForCausalLM
+from pymllm.mobile.backends.qualcomm.transformers.core.observer import ConcatObserver
 
 
 def recompute_scale_zp(module):
diff --git a/pymllm/backends/qualcomm/transformers/qwen3/train.py b/pymllm/mobile/backends/qualcomm/transformers/qwen3/train.py
similarity index 94%
rename from pymllm/backends/qualcomm/transformers/qwen3/train.py
rename to pymllm/mobile/backends/qualcomm/transformers/qwen3/train.py
index 63c6d0e8..f44fa67b 100644
--- a/pymllm/backends/qualcomm/transformers/qwen3/train.py
+++ b/pymllm/mobile/backends/qualcomm/transformers/qwen3/train.py
@@ -2,7 +2,7 @@
 import torch
 import argparse
 from safetensors.torch import save_model
-from pymllm.backends.qualcomm.transformers.qwen3.runner import Qwen3Quantizer
+from pymllm.mobile.backends.qualcomm.transformers.qwen3.runner import Qwen3Quantizer
 
 
 def main():
diff --git a/pymllm/convertor/__init__.py b/pymllm/mobile/convertor/__init__.py
similarity index 100%
rename from pymllm/convertor/__init__.py
rename to pymllm/mobile/convertor/__init__.py
diff --git a/pymllm/convertor/mllm_type_mapping.py b/pymllm/mobile/convertor/mllm_type_mapping.py
similarity index 100%
rename from pymllm/convertor/mllm_type_mapping.py
rename to pymllm/mobile/convertor/mllm_type_mapping.py
diff --git a/pymllm/convertor/model_file_v1.py b/pymllm/mobile/convertor/model_file_v1.py
similarity index 100%
rename from pymllm/convertor/model_file_v1.py
rename to pymllm/mobile/convertor/model_file_v1.py
diff --git a/pymllm/convertor/model_file_v2.py b/pymllm/mobile/convertor/model_file_v2.py
similarity index 100%
rename from pymllm/convertor/model_file_v2.py
rename to pymllm/mobile/convertor/model_file_v2.py
diff --git a/pymllm/ffi/__init__.py b/pymllm/mobile/ffi/__init__.py
similarity index 100%
rename from pymllm/ffi/__init__.py
rename to pymllm/mobile/ffi/__init__.py
diff --git a/pymllm/ffi/_ffi_api.py b/pymllm/mobile/ffi/_ffi_api.py
similarity index 100%
rename from pymllm/ffi/_ffi_api.py
rename to pymllm/mobile/ffi/_ffi_api.py
diff --git a/pymllm/ffi/base.py b/pymllm/mobile/ffi/base.py
similarity index 90%
rename from pymllm/ffi/base.py
rename to pymllm/mobile/ffi/base.py
index 07a01c49..96aed242 100644
--- a/pymllm/ffi/base.py
+++ b/pymllm/mobile/ffi/base.py
@@ -8,7 +8,7 @@
 
 def _load_lib():
     file_dir = os.path.dirname(os.path.realpath(__file__))
-    parent_dir = os.path.dirname(file_dir)
+    parent_dir = os.path.dirname(os.path.dirname(file_dir))
 
     # Platform-specific library names
     if sys.platform.startswith("win32"):
diff --git a/pymllm/nn/__init__.py b/pymllm/mobile/nn/__init__.py
similarity index 100%
rename from pymllm/nn/__init__.py
rename to pymllm/mobile/nn/__init__.py
diff --git a/pymllm/nn/_layers.py b/pymllm/mobile/nn/_layers.py
similarity index 100%
rename from pymllm/nn/_layers.py
rename to pymllm/mobile/nn/_layers.py
diff --git a/pymllm/nn/_module.py b/pymllm/mobile/nn/_module.py
similarity index 100%
rename from pymllm/nn/_module.py
rename to pymllm/mobile/nn/_module.py
diff --git a/pymllm/nn/functional.py b/pymllm/mobile/nn/functional.py
similarity index 100%
rename from pymllm/nn/functional.py
rename to pymllm/mobile/nn/functional.py
diff --git a/pymllm/quantize/__init__.py b/pymllm/mobile/quantize/__init__.py
similarity index 100%
rename from pymllm/quantize/__init__.py
rename to pymllm/mobile/quantize/__init__.py
diff --git a/pymllm/quantize/cast2fp32_pass.py b/pymllm/mobile/quantize/cast2fp32_pass.py
similarity index 100%
rename from pymllm/quantize/cast2fp32_pass.py
rename to pymllm/mobile/quantize/cast2fp32_pass.py
diff --git a/pymllm/compile/__init__.py b/pymllm/mobile/quantize/gguf/__init__.py
similarity index 100%
rename from pymllm/compile/__init__.py
rename to pymllm/mobile/quantize/gguf/__init__.py
diff --git a/pymllm/quantize/kai/__init__.py b/pymllm/mobile/quantize/kai/__init__.py
similarity index 100%
rename from pymllm/quantize/kai/__init__.py
rename to pymllm/mobile/quantize/kai/__init__.py
diff --git a/pymllm/quantize/kai/w4a32.py b/pymllm/mobile/quantize/kai/w4a32.py
similarity index 100%
rename from pymllm/quantize/kai/w4a32.py
rename to pymllm/mobile/quantize/kai/w4a32.py
diff --git a/pymllm/quantize/pipeline.py b/pymllm/mobile/quantize/pipeline.py
similarity index 100%
rename from pymllm/quantize/pipeline.py
rename to pymllm/mobile/quantize/pipeline.py
diff --git a/pymllm/quantize/quantize_pass.py b/pymllm/mobile/quantize/quantize_pass.py
similarity index 100%
rename from pymllm/quantize/quantize_pass.py
rename to pymllm/mobile/quantize/quantize_pass.py
diff --git a/pymllm/quantize/solver.py b/pymllm/mobile/quantize/solver.py
similarity index 100%
rename from pymllm/quantize/solver.py
rename to pymllm/mobile/quantize/solver.py
diff --git a/pymllm/quantize/gguf/__init__.py b/pymllm/mobile/quantize/spinquant/__init__.py
similarity index 100%
rename from pymllm/quantize/gguf/__init__.py
rename to pymllm/mobile/quantize/spinquant/__init__.py
diff --git a/pymllm/service/__init__.py b/pymllm/mobile/service/__init__.py
similarity index 100%
rename from pymllm/service/__init__.py
rename to pymllm/mobile/service/__init__.py
diff --git a/pymllm/service/models_hub.py b/pymllm/mobile/service/models_hub.py
similarity index 100%
rename from pymllm/service/models_hub.py
rename to pymllm/mobile/service/models_hub.py
diff --git a/pymllm/service/network.py b/pymllm/mobile/service/network.py
similarity index 100%
rename from pymllm/service/network.py
rename to pymllm/mobile/service/network.py
diff --git a/pymllm/service/rr_process.py b/pymllm/mobile/service/rr_process.py
similarity index 100%
rename from pymllm/service/rr_process.py
rename to pymllm/mobile/service/rr_process.py
diff --git a/pymllm/service/tools.py b/pymllm/mobile/service/tools.py
similarity index 100%
rename from pymllm/service/tools.py
rename to pymllm/mobile/service/tools.py
diff --git a/pymllm/tests/qualcomm/test_context_create.py b/pymllm/mobile/tests/qualcomm/test_context_create.py
similarity index 89%
rename from pymllm/tests/qualcomm/test_context_create.py
rename to pymllm/mobile/tests/qualcomm/test_context_create.py
index 18983daa..94f42b51 100644
--- a/pymllm/tests/qualcomm/test_context_create.py
+++ b/pymllm/mobile/tests/qualcomm/test_context_create.py
@@ -1,5 +1,5 @@
-import pymllm as mllm
-from pymllm.backends.qualcomm.qnn_aot_env import (
+import pymllm.mobile as mllm
+from pymllm.mobile.backends.qualcomm.qnn_aot_env import (
     QnnAOTEnv,
     QnnDeviceAndContext,
     QcomTryBestPerformance,
diff --git a/pymllm/tests/test_nn.py b/pymllm/mobile/tests/test_nn.py
similarity index 83%
rename from pymllm/tests/test_nn.py
rename to pymllm/mobile/tests/test_nn.py
index d9a3db2d..403060e9 100644
--- a/pymllm/tests/test_nn.py
+++ b/pymllm/mobile/tests/test_nn.py
@@ -1,5 +1,5 @@
-import pymllm as mllm
-from pymllm import nn
+import pymllm.mobile as mllm
+from pymllm.mobile import nn
 
 
 class FooModule(nn.Module):
diff --git a/pymllm/tests/test_tensor.py b/pymllm/mobile/tests/test_tensor.py
similarity index 89%
rename from pymllm/tests/test_tensor.py
rename to pymllm/mobile/tests/test_tensor.py
index e935f10b..474e1092 100644
--- a/pymllm/tests/test_tensor.py
+++ b/pymllm/mobile/tests/test_tensor.py
@@ -1,7 +1,7 @@
 # Copyright (c) MLLM Team.
 # Licensed under the MIT License.
 
-import pymllm as torch
+import pymllm.mobile as torch
 
 
 def test_empty_tensor_create() -> bool:
diff --git a/pymllm/utils/__init__.py b/pymllm/mobile/utils/__init__.py
similarity index 100%
rename from pymllm/utils/__init__.py
rename to pymllm/mobile/utils/__init__.py
diff --git a/pymllm/utils/adb.py b/pymllm/mobile/utils/adb.py
similarity index 100%
rename from pymllm/utils/adb.py
rename to pymllm/mobile/utils/adb.py
diff --git a/pymllm/utils/error_handler.py b/pymllm/mobile/utils/error_handler.py
similarity index 100%
rename from pymllm/utils/error_handler.py
rename to pymllm/mobile/utils/error_handler.py
diff --git a/pymllm/utils/mllm_convertor.py b/pymllm/mobile/utils/mllm_convertor.py
similarity index 100%
rename from pymllm/utils/mllm_convertor.py
rename to pymllm/mobile/utils/mllm_convertor.py
diff --git a/pymllm/compile/mllm_ir/trace.py b/pymllm/models/__init__.py
similarity index 100%
rename from pymllm/compile/mllm_ir/trace.py
rename to pymllm/models/__init__.py
diff --git a/pymllm/utils/mllm_convertor_server/service.py b/pymllm/utils/mllm_convertor_server/service.py
deleted file mode 100644
index ea8e2bec..00000000
--- a/pymllm/utils/mllm_convertor_server/service.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Copyright (c) MLLM Team.
-# Licensed under the MIT License.
diff --git a/pyproject.toml b/pyproject.toml
index 703d4456..efe4a14d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [build-system]
 requires = [
-  "scikit-build-core>=0.11.0", "apache-tvm-ffi"
+  "scikit-build-core>=0.11.0", "apache-tvm-ffi == 0.1.8"
 ]
 build-backend = "scikit_build_core.build"
 
@@ -21,7 +21,7 @@ dependencies=[
   "packaging",
   "pytest",
   "pytest-html",
-  "apache-tvm-ffi == 0.1.0b4",
+  "apache-tvm-ffi == 0.1.8",
   "pyyaml >= 6.0.2",
   "openai",
   "modelscope",
@@ -36,8 +36,8 @@ dependencies=[
 cuda = ["tilelang"]
 
 [project.scripts]
-mllm-convertor = "pymllm.utils.mllm_convertor:main"
-mllm-service = "pymllm.service.tools:cli_app"
+mllm-convertor = "pymllm.mobile.utils.mllm_convertor:main"
+mllm-service = "pymllm.mobile.service.tools:cli_app"
 
 [tool.setuptools.exclude-package-data]
 "*" = ["*.pyc"]

From abf1fa4228e05cbd565c4b35e5c6dfeba804ec5f Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Wed, 18 Feb 2026 04:46:41 +0000
Subject: [PATCH 03/13] feat: enhance configuration management and update
 dependencies

- Added `flashinfer-python` to the optional `cuda` dependencies in `pyproject.toml`.
- Introduced new configuration files for server, model, and layers to centralize runtime settings.
- Created initial structure for various layers and components to support future development.
---
 pymllm/configs/__init__.py          |   0
 pymllm/configs/model_config.py      |   0
 pymllm/configs/server_config.py     | 267 ++++++++++++++++++++++++++++
 pymllm/layers/_layer.py             |   0
 pymllm/layers/attention/__init__.py |   0
 pymllm/layers/attention/gdn.py      |   0
 pymllm/layers/attention/normal.py   |   0
 pymllm/layers/embedding.py          |   0
 pymllm/layers/mlp.py                |   0
 pymllm/layers/rms_norm.py           |   0
 pymllm/mem_cache/__init__.py        |   0
 pymllm/models/qwen3_moe.py          |   0
 pymllm/orchestrator/__init__.py     |   0
 pymllm/server/__init__.py           |   0
 pyproject.toml                      |   2 +-
 15 files changed, 268 insertions(+), 1 deletion(-)
 create mode 100644 pymllm/configs/__init__.py
 create mode 100644 pymllm/configs/model_config.py
 create mode 100644 pymllm/configs/server_config.py
 create mode 100644 pymllm/layers/_layer.py
 create mode 100644 pymllm/layers/attention/__init__.py
 create mode 100644 pymllm/layers/attention/gdn.py
 create mode 100644 pymllm/layers/attention/normal.py
 create mode 100644 pymllm/layers/embedding.py
 create mode 100644 pymllm/layers/mlp.py
 create mode 100644 pymllm/layers/rms_norm.py
 create mode 100644 pymllm/mem_cache/__init__.py
 create mode 100644 pymllm/models/qwen3_moe.py
 create mode 100644 pymllm/orchestrator/__init__.py
 create mode 100644 pymllm/server/__init__.py

diff --git a/pymllm/configs/__init__.py b/pymllm/configs/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/configs/model_config.py b/pymllm/configs/model_config.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/configs/server_config.py b/pymllm/configs/server_config.py
new file mode 100644
index 00000000..56be4fc4
--- /dev/null
+++ b/pymllm/configs/server_config.py
@@ -0,0 +1,267 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, Literal, Optional
+from dataclasses import asdict, dataclass, field
+
+
+@dataclass
+class ServerConfig:
+    """
+    Centralized runtime configuration for the MLLM server.
+
+    The fields are grouped by operational concern so that:
+    - CLI args can map directly to this dataclass.
+    - YAML/JSON config files can be loaded and validated in one place.
+    - future extensions can follow a predictable structure.
+    """
+
+    # -------------------------------------------------------------------------
+    # Model and tokenizer settings
+    # -------------------------------------------------------------------------
+    # Required path to the model checkpoint directory or model identifier.
+    model_path: Path
+    # Optional tokenizer path; when omitted we fall back to `model_path`.
+    tokenizer_path: Optional[Path] = None
+    # Tokenizer bootstrap strategy:
+    # - "auto": infer tokenizer mode from model type.
+    # - "slow"/"fast": force a specific tokenizer implementation.
+    tokenizer_mode: Literal["auto", "slow", "fast"] = "auto"
+    # Number of worker threads/processes used by tokenizer service.
+    tokenizer_worker_num: int = 1
+    # Skip tokenizer initialization at startup to reduce cold-start latency.
+    skip_tokenizer_init: bool = False
+    # Model loading format hint for loader backends.
+    load_format: Literal["auto", "pt", "safetensors", "gguf"] = "auto"
+    # Allow loading custom model code from remote repositories.
+    trust_remote_code: bool = False
+    # Explicit context length; `None` means infer from model config.
+    context_length: Optional[int] = None
+    # Model precision policy for weights and activations.
+    dtype: Literal["auto", "float16", "bfloat16", "float32"] = "auto"
+    # Quantization algorithm to apply at load time.
+    quantization: Optional[str] = None
+    # KV cache dtype; can differ from model dtype for better memory trade-offs.
+    kv_cache_dtype: Literal["auto", "float16", "bfloat16", "fp8_e4m3", "fp8_e5m2"] = (
+        "auto"
+    )
+    # HuggingFace revision/commit/tag for deterministic model resolution.
+    revision: Optional[str] = None
+    # Optional custom directory used to cache downloaded model artifacts.
+    download_dir: Optional[Path] = None
+
+    # -------------------------------------------------------------------------
+    # HTTP / API server settings
+    # -------------------------------------------------------------------------
+    # Host address the HTTP server binds to.
+    host: str = "127.0.0.1"
+    # TCP port exposed by the HTTP server.
+    port: int = 30000
+    # Optional FastAPI root path when running behind a reverse proxy.
+    fastapi_root_path: str = ""
+    # API key required by client-facing endpoints.
+    api_key: Optional[str] = None
+    # Admin API key for privileged management endpoints.
+    admin_api_key: Optional[str] = None
+    # Public model name returned in OpenAI-compatible API responses.
+    served_model_name: Optional[str] = None
+    # Path used for server-side file uploads or temporary user artifacts.
+    file_storage_path: Path = Path("mllm_storage")
+
+    # -------------------------------------------------------------------------
+    # Runtime and scheduling behavior
+    # -------------------------------------------------------------------------
+    # Fraction of total GPU memory reserved for static allocations
+    # (primarily model weights + KV cache).
+    mem_fraction_static: Optional[float] = None
+    # Maximum number of requests concurrently executing in scheduler.
+    max_running_requests: Optional[int] = None
+    # Maximum queued requests waiting for execution.
+    max_queued_requests: Optional[int] = None
+    # Hard cap of total active tokens across all in-flight requests.
+    max_total_tokens: Optional[int] = None
+    # Prefill chunk size used to trade throughput vs memory pressure.
+    chunked_prefill_size: Optional[int] = None
+    # Upper bound for tokens accepted in a single prefill pass.
+    max_prefill_tokens: int = 16384
+    # Scheduling policy:
+    # - "fcfs": first-come-first-served fairness.
+    # - "lpm": longest-prefix-match style cache locality optimization.
+    schedule_policy: Literal["fcfs", "lpm"] = "fcfs"
+    # Conservative multiplier for scheduler admission decisions.
+    # Values > 1.0 are safer for OOM avoidance but may reduce utilization.
+    schedule_conservativeness: float = 1.0
+    # Enable low-power sleep while idle to reduce background GPU usage.
+    sleep_on_idle: bool = False
+    # Stream partial output every N decode steps when streaming is enabled.
+    stream_interval: int = 1
+    # Enable token streaming in generation responses.
+    stream_output: bool = True
+
+    # -------------------------------------------------------------------------
+    # Parallelism and distributed deployment
+    # -------------------------------------------------------------------------
+    # Tensor parallel size (intra-layer sharding).
+    tp_size: int = 1
+    # Data parallel size (replicated model workers).
+    dp_size: int = 1
+    # Expert parallel size for MoE-style models.
+    ep_size: int = 1
+    # Pipeline parallel size (inter-layer partitioning).
+    pp_size: int = 1
+    # Number of nodes participating in distributed serving.
+    nnodes: int = 1
+    # Rank of current node in multi-node topology.
+    node_rank: int = 0
+    # Torch distributed init address, e.g. "host:port".
+    dist_init_addr: Optional[str] = None
+    # Optional NCCL communication port override.
+    nccl_port: Optional[int] = None
+    # Timeout in seconds for distributed collectives.
+    dist_timeout: Optional[int] = None
+    # Base GPU index used for process-to-device mapping.
+    base_gpu_id: int = 0
+    # Step size between logical workers when assigning GPU IDs.
+    gpu_id_step: int = 1
+
+    # -------------------------------------------------------------------------
+    # Backend and acceleration toggles
+    # -------------------------------------------------------------------------
+    # Attention kernel backend selection.
+    attention_backend: Optional[str] = None
+    # Sampling backend selection.
+    sampling_backend: Optional[str] = None
+    # Grammar-constrained decoding backend.
+    grammar_backend: Optional[str] = None
+    # Disable CUDA graph capture for debugging/compatibility.
+    disable_cuda_graph: bool = False
+    # Enable `torch.compile` acceleration path.
+    enable_torch_compile: bool = False
+    # Maximum batch size considered by `torch.compile` profiles.
+    torch_compile_max_bs: int = 32
+    # Enable deterministic inference behavior where possible.
+    enable_deterministic_inference: bool = False
+    # Random seed for reproducible sampling and initialization.
+    random_seed: Optional[int] = None
+
+    # -------------------------------------------------------------------------
+    # Logging, metrics, and observability
+    # -------------------------------------------------------------------------
+    # Global log level for server components.
+    log_level: Literal["debug", "info", "warning", "error", "critical"] = "info"
+    # HTTP access log level; if None, inherits global log level.
+    log_level_http: Optional[str] = None
+    # Log each request payload/metadata for debugging.
+    log_requests: bool = False
+    # Verbosity level for request logging, larger means more detail.
+    log_requests_level: int = 2
+    # Toggle built-in Prometheus/metrics endpoint.
+    enable_metrics: bool = False
+    # Include latency/time-cost summaries in logs.
+    show_time_cost: bool = False
+    # Optional OpenTelemetry traces endpoint ("host:port").
+    otlp_traces_endpoint: str = "localhost:4317"
+    # Enable tracing export to OTLP collector.
+    enable_trace: bool = False
+
+    # -------------------------------------------------------------------------
+    # Feature switches and advanced decoding options
+    # -------------------------------------------------------------------------
+    # Enable LoRA adapter serving support.
+    enable_lora: bool = False
+    # Maximum number of LoRA adapters loaded simultaneously.
+    max_loaded_loras: Optional[int] = None
+    # Maximum LoRA adapters that can be mixed in one batch.
+    max_loras_per_batch: int = 8
+    # LoRA backend implementation.
+    lora_backend: Literal["triton", "csgmv", "torch_native"] = "csgmv"
+    # Enable multimodal processing pipeline.
+    enable_multimodal: bool = False
+    # Max concurrent multimodal tool calls.
+    mm_max_concurrent_calls: int = 32
+    # Timeout (seconds) for each multimodal call.
+    mm_per_request_timeout: float = 10.0
+    # Speculative decoding algorithm name (e.g. "eagle", "ngram").
+    speculative_algorithm: Optional[str] = None
+    # Draft model path used in speculative decoding.
+    speculative_draft_model_path: Optional[Path] = None
+    # Number of speculative steps per target decode iteration.
+    speculative_num_steps: Optional[int] = None
+    # Number of proposed draft tokens per speculation step.
+    speculative_num_draft_tokens: Optional[int] = None
+
+    # -------------------------------------------------------------------------
+    # Internal bookkeeping (not usually set by users directly)
+    # -------------------------------------------------------------------------
+    # Additional arbitrary key-value options for forward compatibility.
+    extra_options: dict[str, Any] = field(default_factory=dict)
+
+    def __post_init__(self) -> None:
+        """Normalize defaults and validate constraints after dataclass initialization."""
+        if self.tokenizer_path is None:
+            self.tokenizer_path = self.model_path
+        if self.served_model_name is None:
+            self.served_model_name = str(self.model_path)
+
+        self._validate_basic_constraints()
+        self._validate_parallelism_constraints()
+        self._validate_scheduler_constraints()
+
+    def _validate_basic_constraints(self) -> None:
+        """Validate scalar ranges and common invariants."""
+        if self.port <= 0 or self.port > 65535:
+            raise ValueError("`port` must be in range [1, 65535].")
+        if self.max_prefill_tokens <= 0:
+            raise ValueError("`max_prefill_tokens` must be greater than 0.")
+        if self.stream_interval <= 0:
+            raise ValueError("`stream_interval` must be greater than 0.")
+        if self.mem_fraction_static is not None and not (
+            0.0 < self.mem_fraction_static < 1.0
+        ):
+            raise ValueError("`mem_fraction_static` must be in range (0.0, 1.0).")
+
+    def _validate_parallelism_constraints(self) -> None:
+        """Validate distributed and parallel topology settings."""
+        for key, value in {
+            "tp_size": self.tp_size,
+            "dp_size": self.dp_size,
+            "ep_size": self.ep_size,
+            "pp_size": self.pp_size,
+            "nnodes": self.nnodes,
+        }.items():
+            if value <= 0:
+                raise ValueError(f"`{key}` must be greater than 0.")
+
+        if self.node_rank < 0 or self.node_rank >= self.nnodes:
+            raise ValueError("`node_rank` must satisfy 0 <= node_rank < nnodes.")
+
+    def _validate_scheduler_constraints(self) -> None:
+        """Validate scheduler-related soft limits."""
+        if self.max_running_requests is not None and self.max_running_requests <= 0:
+            raise ValueError("`max_running_requests` must be greater than 0 when set.")
+        if self.max_queued_requests is not None and self.max_queued_requests < 0:
+            raise ValueError("`max_queued_requests` must be >= 0 when set.")
+        if self.max_total_tokens is not None and self.max_total_tokens <= 0:
+            raise ValueError("`max_total_tokens` must be greater than 0 when set.")
+        if self.chunked_prefill_size is not None and self.chunked_prefill_size <= 0:
+            raise ValueError("`chunked_prefill_size` must be greater than 0 when set.")
+        if self.schedule_conservativeness <= 0:
+            raise ValueError("`schedule_conservativeness` must be greater than 0.")
+
+    def to_dict(self) -> dict[str, Any]:
+        """
+        Serialize config to a plain dictionary.
+
+        Path values are converted to string for easier JSON/YAML serialization.
+        """
+        data = asdict(self)
+        for key in [
+            "model_path",
+            "tokenizer_path",
+            "download_dir",
+            "file_storage_path",
+            "speculative_draft_model_path",
+        ]:
+            if data.get(key) is not None:
+                data[key] = str(data[key])
+        return data
diff --git a/pymllm/layers/_layer.py b/pymllm/layers/_layer.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/layers/attention/__init__.py b/pymllm/layers/attention/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/layers/attention/gdn.py b/pymllm/layers/attention/gdn.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/layers/attention/normal.py b/pymllm/layers/attention/normal.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/layers/embedding.py b/pymllm/layers/embedding.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/layers/mlp.py b/pymllm/layers/mlp.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/layers/rms_norm.py b/pymllm/layers/rms_norm.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/mem_cache/__init__.py b/pymllm/mem_cache/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/models/qwen3_moe.py b/pymllm/models/qwen3_moe.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/orchestrator/__init__.py b/pymllm/orchestrator/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/server/__init__.py b/pymllm/server/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pyproject.toml b/pyproject.toml
index efe4a14d..89d69947 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,7 @@ dependencies=[
 ]
 
 [project.optional-dependencies]
-cuda = ["tilelang"]
+cuda = ["tilelang", "flashinfer-python"]
 
 [project.scripts]
 mllm-convertor = "pymllm.mobile.utils.mllm_convertor:main"

From ec71258940415868080af0a070e637e4cf0ca2f9 Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Wed, 18 Feb 2026 11:40:56 +0000
Subject: [PATCH 04/13] feat: add main entry points and configuration for
 pymllm and mllm-kernel

- Added main entry points for `pymllm` and `mllm-kernel` in their respective `pyproject.toml` files.
- Implemented a configuration module for `pymllm` to manage global settings, including server, model, runtime, and cache configurations.
- Introduced the `VocabParallelEmbedding` layer and utility functions for weight management in the layers module.
- Created initial tests for the `VocabParallelEmbedding` layer to validate functionality with tensor parallelism.
---
 mllm-kernel/mllm_kernel/__main__.py           |   2 +-
 mllm-kernel/pyproject.toml                    |   3 +
 pymllm/__main__.py                            |  39 ++
 pymllm/configs/__init__.py                    |  21 ++
 pymllm/configs/global_config.py               | 349 ++++++++++++++++++
 .../configs/quantization_config.py            |   0
 pymllm/layers/__init__.py                     |  11 +
 pymllm/layers/base.py                         |  27 ++
 pymllm/layers/{_layer.py => custom_event.py}  |   0
 pymllm/layers/embedding.py                    | 152 ++++++++
 pymllm/layers/utils.py                        |  45 +++
 pymllm/orchestrator/__init__.py               |  48 +++
 pymllm/orchestrator/group_coordinator.py      |  98 +++++
 pymllm/orchestrator/parallel_state.py         | 207 +++++++++++
 pymllm/quantization/__init__.py               |   0
 pymllm/quantization/methods/__init__.py       |   0
 pymllm/quantization/methods/awq_w4a16.py      |   0
 pymllm/quantization/quant_recipe.py           |   3 +
 pymllm/tests/README.md                        |   0
 pymllm/tests/test_vocab_parallel_embedding.py | 310 ++++++++++++++++
 pyproject.toml                                |   1 +
 21 files changed, 1315 insertions(+), 1 deletion(-)
 create mode 100644 pymllm/configs/global_config.py
 rename mllm-kernel/requirements.txt => pymllm/configs/quantization_config.py (100%)
 create mode 100644 pymllm/layers/base.py
 rename pymllm/layers/{_layer.py => custom_event.py} (100%)
 create mode 100644 pymllm/layers/utils.py
 create mode 100644 pymllm/orchestrator/group_coordinator.py
 create mode 100644 pymllm/orchestrator/parallel_state.py
 create mode 100644 pymllm/quantization/__init__.py
 create mode 100644 pymllm/quantization/methods/__init__.py
 create mode 100644 pymllm/quantization/methods/awq_w4a16.py
 create mode 100644 pymllm/quantization/quant_recipe.py
 create mode 100644 pymllm/tests/README.md
 create mode 100644 pymllm/tests/test_vocab_parallel_embedding.py

diff --git a/mllm-kernel/mllm_kernel/__main__.py b/mllm-kernel/mllm_kernel/__main__.py
index d4888b86..e5f0779d 100644
--- a/mllm-kernel/mllm_kernel/__main__.py
+++ b/mllm-kernel/mllm_kernel/__main__.py
@@ -388,7 +388,7 @@ def main() -> None:
     logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
 
     parser = argparse.ArgumentParser(
-        prog="python -m mllm_kernel",
+        prog="mllm_kernel",
         description="mllm-kernel helper commands.",
     )
     parser.add_argument(
diff --git a/mllm-kernel/pyproject.toml b/mllm-kernel/pyproject.toml
index 5fe07eea..a8dbd98e 100644
--- a/mllm-kernel/pyproject.toml
+++ b/mllm-kernel/pyproject.toml
@@ -27,6 +27,9 @@ dev = [
   "pytest-html",
 ]
 
+[project.scripts]
+mllm-kernel = "mllm_kernel.__main__:main"
+
 [tool.scikit-build]
 # Build configuration
 wheel.py-api = "py3"
diff --git a/pymllm/__main__.py b/pymllm/__main__.py
index e69de29b..0b427fce 100644
--- a/pymllm/__main__.py
+++ b/pymllm/__main__.py
@@ -0,0 +1,39 @@
+def show_config() -> None:
+    from . import is_mobile_available
+
+    mobile_enabled = str(is_mobile_available()).lower()
+    print(f"mllm mobile: {mobile_enabled}")
+
+    # try import mllm_kernel, if true, print mllm_kernel config
+    try:
+        import mllm_kernel
+
+        print(f"mllm_kernel: {mllm_kernel.__version__}")
+    except ImportError:
+        print("mllm_kernel: not found")
+
+
+def main() -> None:
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        prog="pymllm",
+        description="pymllm helper commands.",
+    )
+    parser.add_argument(
+        "command",
+        nargs="?",
+        choices=["show-config"],
+        help="Run helper command. Use 'show-config' to print config details.",
+    )
+    args = parser.parse_args()
+
+    if args.command == "show-config":
+        show_config()
+        return
+
+    parser.print_help()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pymllm/configs/__init__.py b/pymllm/configs/__init__.py
index e69de29b..86af57be 100644
--- a/pymllm/configs/__init__.py
+++ b/pymllm/configs/__init__.py
@@ -0,0 +1,21 @@
+"""Configuration module for pymllm."""
+
+from pymllm.configs.global_config import (
+    CacheConfig,
+    GlobalConfig,
+    ModelConfig,
+    RuntimeConfig,
+    get_global_config,
+)
+from pymllm.configs.server_config import ServerConfig
+
+__all__ = [
+    # Main singleton
+    "GlobalConfig",
+    "get_global_config",
+    # Sub configs
+    "ServerConfig",
+    "ModelConfig",
+    "RuntimeConfig",
+    "CacheConfig",
+]
diff --git a/pymllm/configs/global_config.py b/pymllm/configs/global_config.py
new file mode 100644
index 00000000..43783e94
--- /dev/null
+++ b/pymllm/configs/global_config.py
@@ -0,0 +1,349 @@
+"""Global configuration singleton with all server, model and runtime configs."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, Literal, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from transformers import PretrainedConfig
+
+
+@dataclass
+class ModelConfig:
+    """Model-specific configuration parsed from HF config.
+    
+    This is a lightweight wrapper around HuggingFace config with
+    additional derived fields for efficiency.
+    """
+    # Original HF config (populated after loading)
+    hf_config: Optional[Any] = field(default=None, repr=False)
+    hf_text_config: Optional[Any] = field(default=None, repr=False)
+    
+    # Model architecture
+    model_type: str = "unknown"
+    architectures: list[str] = field(default_factory=list)
+    
+    # Dimensions
+    hidden_size: int = 0
+    num_hidden_layers: int = 0
+    num_attention_heads: int = 0
+    num_key_value_heads: Optional[int] = None
+    intermediate_size: int = 0
+    vocab_size: int = 0
+    
+    # Context length
+    max_position_embeddings: int = 0
+    context_length: int = 0  # effective context length
+    
+    # Normalization
+    rms_norm_eps: float = 1e-6
+    tie_word_embeddings: bool = False
+    
+    # RoPE
+    rope_theta: float = 10000.0
+    rope_scaling: Optional[Dict[str, Any]] = None
+    
+    # Quantization
+    quantization: Optional[str] = None
+    
+    def __post_init__(self):
+        """Set default kv heads if not specified."""
+        if self.num_key_value_heads is None:
+            self.num_key_value_heads = self.num_attention_heads
+
+
+@dataclass  
+class RuntimeConfig:
+    """Runtime state that changes during execution."""
+    
+    # Distributed state
+    tp_rank: int = 0
+    tp_size: int = 1
+    dp_rank: int = 0
+    dp_size: int = 1
+    pp_rank: int = 0
+    pp_size: int = 1
+    world_rank: int = 0
+    world_size: int = 1
+    local_rank: int = 0
+    
+    # Device
+    device: str = "cuda"
+    
+    # Memory pools
+    max_num_seqs: int = 0
+    max_model_len: int = 0
+    
+    # Scheduler state (mutable during runtime)
+    num_running_reqs: int = 0
+    num_waiting_reqs: int = 0
+    num_swapped_reqs: int = 0
+
+
+@dataclass
+class CacheConfig:
+    """KV cache configuration."""
+    
+    block_size: int = 16
+    num_gpu_blocks: int = 0
+    num_cpu_blocks: int = 0
+    
+    # Cache dtype
+    cache_dtype: Literal["auto", "float16", "bfloat16", "fp8_e4m3", "fp8_e5m2"] = "auto"
+    
+    # Sliding window
+    sliding_window: Optional[int] = None
+    
+    # Prefix caching
+    enable_prefix_caching: bool = False
+
+
+@dataclass
+class GlobalConfig:
+    """Global configuration singleton containing all configs.
+    
+    This is the single source of truth for all configuration in pymllm.
+    It aggregates ServerConfig, ModelConfig, RuntimeConfig, and CacheConfig.
+    
+    Usage:
+        >>> from pymllm.configs import get_global_config
+        >>> config = get_global_config()
+        >>> 
+        >>> # Access server config
+        >>> config.server.model_path
+        >>> config.server.tp_size
+        >>> 
+        >>> # Access model config
+        >>> config.model.hidden_size
+        >>> config.model.vocab_size
+        >>> 
+        >>> # Access runtime config (mutable)
+        >>> config.runtime.tp_rank
+        >>> config.runtime.device
+        >>> 
+        >>> # Access cache config
+        >>> config.cache.block_size
+        >>> 
+        >>> # Update with new server config
+        >>> config.load_server_config(server_config)
+        >>> 
+        >>> # Update with HF model config
+        >>> config.load_hf_config(hf_config)
+    """
+    
+    # Sub-configs
+    server: "ServerConfig" = field(default=None, repr=False)
+    model: ModelConfig = field(default_factory=ModelConfig)
+    runtime: RuntimeConfig = field(default_factory=RuntimeConfig)
+    cache: CacheConfig = field(default_factory=CacheConfig)
+    
+    # Additional metadata
+    _initialized: bool = field(default=False, repr=False)
+    
+    def __new__(cls):
+        if not hasattr(cls, '_instance') or cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+    
+    def __post_init__(self):
+        # Lazy import to avoid circular dependency
+        if self.server is None:
+            from pymllm.configs.server_config import ServerConfig
+            self.server = ServerConfig(
+                model_path=Path("."),  # placeholder
+            )
+    
+    @classmethod
+    def get_instance(cls) -> "GlobalConfig":
+        """Get the singleton instance."""
+        if not hasattr(cls, '_instance') or cls._instance is None:
+            cls._instance = cls()
+        return cls._instance
+    
+    def load_server_config(self, server_config: "ServerConfig") -> None:
+        """Load server configuration and sync related fields."""
+        self.server = server_config
+        
+        # Sync tp/dp/pp sizes to runtime
+        self.runtime.tp_size = server_config.tp_size
+        self.runtime.dp_size = server_config.dp_size
+        self.runtime.pp_size = server_config.pp_size
+        self.runtime.device = "cuda" if server_config.base_gpu_id >= 0 else "cpu"
+        
+        self._initialized = True
+    
+    def load_hf_config(self, hf_config: "PretrainedConfig") -> None:
+        """Load HuggingFace model configuration."""
+        from transformers import PretrainedConfig
+        
+        # Store original
+        self.model.hf_config = hf_config
+        
+        # Get text config (for multimodal models)
+        if hasattr(hf_config, "text_config"):
+            self.model.hf_text_config = hf_config.text_config
+            text_config = hf_config.text_config
+        else:
+            text_config = hf_config
+            self.model.hf_text_config = hf_config
+        
+        # Extract fields
+        self.model.model_type = getattr(text_config, "model_type", "unknown")
+        self.model.architectures = getattr(text_config, "architectures", [])
+        
+        self.model.hidden_size = getattr(text_config, "hidden_size", 0)
+        self.model.num_hidden_layers = getattr(text_config, "num_hidden_layers", 0)
+        self.model.num_attention_heads = getattr(text_config, "num_attention_heads", 0)
+        self.model.num_key_value_heads = getattr(text_config, "num_key_value_heads", None)
+        self.model.intermediate_size = getattr(text_config, "intermediate_size", 0)
+        self.model.vocab_size = getattr(text_config, "vocab_size", 0)
+        
+        # Context length
+        self.model.max_position_embeddings = getattr(
+            text_config, "max_position_embeddings", 0
+        )
+        self.model.context_length = self._get_context_length(text_config)
+        
+        # Normalization
+        self.model.rms_norm_eps = getattr(text_config, "rms_norm_eps", 1e-6)
+        self.model.tie_word_embeddings = getattr(
+            text_config, "tie_word_embeddings", False
+        )
+        
+        # RoPE
+        self.model.rope_theta = getattr(text_config, "rope_theta", 10000.0)
+        self.model.rope_scaling = getattr(text_config, "rope_scaling", None)
+        
+        # Sync to cache config
+        self.cache.sliding_window = getattr(text_config, "sliding_window", None)
+    
+    def _get_context_length(self, config: "PretrainedConfig") -> int:
+        """Extract effective context length from config."""
+        # Try various fields
+        for key in ["max_position_embeddings", "n_positions", "seq_length"]:
+            if hasattr(config, key):
+                value = getattr(config, key)
+                if isinstance(value, int) and value > 0:
+                    return value
+        return 2048  # default
+    
+    def update_runtime(self, **kwargs) -> None:
+        """Update runtime configuration."""
+        for key, value in kwargs.items():
+            if hasattr(self.runtime, key):
+                setattr(self.runtime, key, value)
+            else:
+                raise AttributeError(f"RuntimeConfig has no attribute '{key}'")
+    
+    def update_cache(self, **kwargs) -> None:
+        """Update cache configuration."""
+        for key, value in kwargs.items():
+            if hasattr(self.cache, key):
+                setattr(self.cache, key, value)
+            else:
+                raise AttributeError(f"CacheConfig has no attribute '{key}'")
+    
+    def temp(self, **kwargs):
+        """Context manager for temporary config changes.
+        
+        Usage:
+            # Modify runtime config temporarily
+            with config.temp(runtime=config.runtime):
+                config.runtime.tp_size = 2
+                # ... do something with tp_size=2
+            # runtime restored to original values
+        """
+        return _TempGlobalConfig(self, **kwargs)
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Serialize all configs to dictionary."""
+        return {
+            "server": self.server.to_dict() if self.server else {},
+            "model": self._model_to_dict(),
+            "runtime": self._runtime_to_dict(),
+            "cache": self._cache_to_dict(),
+        }
+    
+    def _model_to_dict(self) -> Dict[str, Any]:
+        """Convert model config to dict."""
+        return {
+            "model_type": self.model.model_type,
+            "architectures": self.model.architectures,
+            "hidden_size": self.model.hidden_size,
+            "num_hidden_layers": self.model.num_hidden_layers,
+            "num_attention_heads": self.model.num_attention_heads,
+            "num_key_value_heads": self.model.num_key_value_heads,
+            "intermediate_size": self.model.intermediate_size,
+            "vocab_size": self.model.vocab_size,
+            "context_length": self.model.context_length,
+        }
+    
+    def _runtime_to_dict(self) -> Dict[str, Any]:
+        """Convert runtime config to dict."""
+        return {
+            "tp_rank": self.runtime.tp_rank,
+            "tp_size": self.runtime.tp_size,
+            "world_rank": self.runtime.world_rank,
+            "world_size": self.runtime.world_size,
+            "device": self.runtime.device,
+        }
+    
+    def _cache_to_dict(self) -> Dict[str, Any]:
+        """Convert cache config to dict."""
+        return {
+            "block_size": self.cache.block_size,
+            "num_gpu_blocks": self.cache.num_gpu_blocks,
+            "cache_dtype": self.cache.cache_dtype,
+        }
+
+
+class _TempGlobalConfig:
+    """Context manager for temporary global config changes.
+    
+    Supports nested keys like "runtime.tp_size" to modify sub-configs.
+    """
+    
+    def __init__(self, config: GlobalConfig, **kwargs):
+        self.config = config
+        self.temp_values = kwargs
+        self.old_values = {}
+    
+    def _get_nested_attr(self, key: str):
+        """Get attribute, supporting dot notation for nested access."""
+        if "." in key:
+            parts = key.split(".")
+            obj = self.config
+            for part in parts[:-1]:
+                obj = getattr(obj, part)
+            return getattr(obj, parts[-1])
+        return getattr(self.config, key)
+    
+    def _set_nested_attr(self, key: str, value):
+        """Set attribute, supporting dot notation for nested access."""
+        if "." in key:
+            parts = key.split(".")
+            obj = self.config
+            for part in parts[:-1]:
+                obj = getattr(obj, part)
+            setattr(obj, parts[-1], value)
+        else:
+            setattr(self.config, key, value)
+    
+    def __enter__(self):
+        for key, value in self.temp_values.items():
+            self.old_values[key] = self._get_nested_attr(key)
+            self._set_nested_attr(key, value)
+        return self.config
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        for key, value in self.old_values.items():
+            self._set_nested_attr(key, value)
+        return False
+
+
+# Convenience function
+def get_global_config() -> GlobalConfig:
+    """Get the global config singleton instance."""
+    return GlobalConfig.get_instance()
diff --git a/mllm-kernel/requirements.txt b/pymllm/configs/quantization_config.py
similarity index 100%
rename from mllm-kernel/requirements.txt
rename to pymllm/configs/quantization_config.py
diff --git a/pymllm/layers/__init__.py b/pymllm/layers/__init__.py
index e69de29b..6f70a4d1 100644
--- a/pymllm/layers/__init__.py
+++ b/pymllm/layers/__init__.py
@@ -0,0 +1,11 @@
+"""Layers module for pymllm."""
+
+from pymllm.layers.base import MllmBaseLayer
+from pymllm.layers.embedding import VocabParallelEmbedding
+from pymllm.layers.utils import set_weight_attrs
+
+__all__ = [
+    "MllmBaseLayer",
+    "set_weight_attrs",
+    "VocabParallelEmbedding",
+]
diff --git a/pymllm/layers/base.py b/pymllm/layers/base.py
new file mode 100644
index 00000000..5dc519f4
--- /dev/null
+++ b/pymllm/layers/base.py
@@ -0,0 +1,27 @@
+import torch
+from torch import nn
+from torch.nn import Parameter
+from pymllm.layers.utils import set_weight_attrs
+from pymllm.quantization.quant_recipe import QuantRecipe
+
+
+class MllmBaseLayer(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.quant_recipe: QuantRecipe = None
+
+    def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
+        """Load weights into a parameter.
+
+        This is the default implementation that directly copies the loaded weight
+        into the parameter. Subclasses should override this method to implement
+        custom loading logic (e.g., tensor parallelism sharding).
+
+        Args:
+            param: The parameter to load weights into.
+            loaded_weight: The weight tensor loaded from checkpoint.
+        """
+        param.data.copy_(loaded_weight)
+
+    def forward(self, *args, **kwargs):
+        raise NotImplementedError("Subclasses must implement forward method")
diff --git a/pymllm/layers/_layer.py b/pymllm/layers/custom_event.py
similarity index 100%
rename from pymllm/layers/_layer.py
rename to pymllm/layers/custom_event.py
diff --git a/pymllm/layers/embedding.py b/pymllm/layers/embedding.py
index e69de29b..0442caa4 100644
--- a/pymllm/layers/embedding.py
+++ b/pymllm/layers/embedding.py
@@ -0,0 +1,152 @@
+import torch
+import torch.nn.functional as F
+from torch.nn import Parameter
+
+from pymllm.layers.base import MllmBaseLayer
+from pymllm.layers.utils import set_weight_attrs
+from pymllm.orchestrator import (
+    divide,
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+    tensor_model_parallel_all_reduce,
+)
+
+
+class VocabParallelEmbedding(MllmBaseLayer):
+    """Embedding layer with vocabulary parallelism.
+
+    This layer shards the embedding table along the vocabulary dimension
+    for tensor parallelism.
+
+    Args:
+        num_embeddings: Size of the vocabulary.
+        embedding_dim: Size of the embedding vector.
+        padding_idx: Index for padding token (optional).
+    """
+
+    def __init__(
+        self,
+        num_embeddings: int,
+        embedding_dim: int,
+        padding_idx: int = None,
+    ):
+        super().__init__()
+
+        # Get TP info from global state
+        self.tp_rank = get_tensor_model_parallel_rank()
+        self.tp_size = get_tensor_model_parallel_world_size()
+
+        self.num_embeddings = num_embeddings
+        self.embedding_dim = embedding_dim
+        self.padding_idx = padding_idx
+
+        # Calculate sharded size
+        if self.num_embeddings % self.tp_size != 0:
+            raise ValueError(
+                f"num_embeddings ({num_embeddings}) must be divisible by "
+                f"tp_size ({self.tp_size})"
+            )
+
+        self.num_embeddings_per_partition = divide(num_embeddings, self.tp_size)
+
+        # Create sharded weight
+        self.weight = Parameter(
+            torch.empty(self.num_embeddings_per_partition, embedding_dim)
+        )
+
+        # Calculate shard range
+        self.vocab_start_index = self.tp_rank * self.num_embeddings_per_partition
+        self.vocab_end_index = (
+            self.vocab_start_index + self.num_embeddings_per_partition
+        )
+
+        # Set weight attributes for loading
+        set_weight_attrs(
+            self.weight,
+            {
+                "output_dim": 0,  # Shard along vocab dimension
+                "input_dim": 1,  # Embedding dimension
+                "weight_loader": self.weight_loader,
+            },
+        )
+
+    def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
+        """Load sharded weights into the parameter.
+
+        Args:
+            param: The parameter to load weights into.
+            loaded_weight: The weight tensor loaded from checkpoint (full size).
+        """
+        output_dim = getattr(param, "output_dim", None)
+
+        if output_dim is None or self.tp_size == 1:
+            # No sharding, direct copy
+            assert param.data.shape == loaded_weight.shape, (
+                f"Shape mismatch: param {param.data.shape} vs "
+                f"loaded {loaded_weight.shape}"
+            )
+            param.data.copy_(loaded_weight)
+        else:
+            # Sharded loading: slice the loaded weight
+            assert loaded_weight.shape[output_dim] == self.num_embeddings, (
+                f"Loaded weight vocab size {loaded_weight.shape[output_dim]} "
+                f"does not match expected {self.num_embeddings}"
+            )
+
+            # Slice along vocab dimension
+            if output_dim == 0:
+                shard_weight = loaded_weight[
+                    self.vocab_start_index : self.vocab_end_index, :
+                ]
+            else:
+                shard_weight = loaded_weight.narrow(
+                    output_dim,
+                    self.vocab_start_index,
+                    self.num_embeddings_per_partition,
+                )
+
+            assert param.data.shape == shard_weight.shape, (
+                f"Shard shape mismatch: param {param.data.shape} vs "
+                f"shard {shard_weight.shape}"
+            )
+            param.data.copy_(shard_weight)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Forward pass of the embedding layer with TP support.
+
+        Args:
+            x: Input tensor of token ids.
+
+        Returns:
+            Embedded representation (all-reduced across TP group if needed).
+        """
+        if self.tp_size > 1:
+            # Create mask for valid vocab range
+            vocab_mask = (x >= self.vocab_start_index) & (x < self.vocab_end_index)
+
+            # Adjust indices to local vocab space
+            masked_input = torch.where(
+                vocab_mask,
+                x - self.vocab_start_index,
+                torch.zeros_like(x),  # Invalid indices become 0 (will be masked)
+            )
+        else:
+            masked_input = x
+            vocab_mask = None
+
+        # Lookup embeddings
+        output = F.embedding(
+            masked_input.long(),
+            self.weight,
+            padding_idx=self.padding_idx if self.padding_idx is not None else None,
+        )
+
+        # Mask invalid positions (for TP)
+        if vocab_mask is not None:
+            output.masked_fill_(~vocab_mask.unsqueeze(-1), 0)
+
+        # All-reduce across TP group
+        if self.tp_size > 1:
+            output = tensor_model_parallel_all_reduce(output)
+
+        return output
diff --git a/pymllm/layers/utils.py b/pymllm/layers/utils.py
new file mode 100644
index 00000000..0dcbd1ac
--- /dev/null
+++ b/pymllm/layers/utils.py
@@ -0,0 +1,45 @@
+"""Utility functions for layers."""
+
+from typing import Any, Dict
+
+import torch
+
+
+def set_weight_attrs(
+    weight: torch.Tensor,
+    weight_attrs: Dict[str, Any] | None,
+) -> None:
+    """Set attributes on a weight tensor.
+
+    This method is used to set attributes on a weight tensor. This method
+    will not overwrite existing attributes.
+
+    Args:
+        weight: The weight tensor or parameter.
+        weight_attrs: A dictionary of attributes to set on the weight tensor.
+            Common attributes include:
+            - output_dim: The dimension along which to shard the weight (typically 0 for output dim)
+            - input_dim: The input dimension (typically 1 for input dim)
+            - weight_loader: A callable to load weights into this parameter
+            - packed_dim: The dimension along which the weight is packed (for quantization)
+            - packed_factor: The packing factor (for quantization)
+
+    Example:
+        >>> weight = nn.Parameter(torch.empty(100, 64))
+        >>> set_weight_attrs(weight, {
+        ...     "output_dim": 0,
+        ...     "input_dim": 1,
+        ...     "weight_loader": my_loader_func,
+        ... })
+    """
+    if weight_attrs is None:
+        return
+
+    for key, value in weight_attrs.items():
+        if hasattr(weight, key):
+            raise AttributeError(
+                f"Overwriting existing tensor attribute: {key}. "
+                f"Existing value: {getattr(weight, key)}, "
+                f"New value: {value}"
+            )
+        setattr(weight, key, value)
diff --git a/pymllm/orchestrator/__init__.py b/pymllm/orchestrator/__init__.py
index e69de29b..f1716d79 100644
--- a/pymllm/orchestrator/__init__.py
+++ b/pymllm/orchestrator/__init__.py
@@ -0,0 +1,48 @@
+"""Orchestrator module for distributed computation."""
+
+from pymllm.orchestrator.group_coordinator import (
+    GroupCoordinator,
+    divide,
+    split_tensor_along_dim,
+)
+from pymllm.orchestrator.parallel_state import (
+    data_parallel_all_reduce,
+    get_data_parallel_rank,
+    get_data_parallel_world_size,
+    get_dp_group,
+    get_pipeline_model_parallel_rank,
+    get_pipeline_model_parallel_world_size,
+    get_pp_group,
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+    get_tp_group,
+    initialize_model_parallel,
+    model_parallel_is_initialized,
+    tensor_model_parallel_all_gather,
+    tensor_model_parallel_all_reduce,
+)
+
+__all__ = [
+    # GroupCoordinator
+    "GroupCoordinator",
+    "divide",
+    "split_tensor_along_dim",
+    # TP
+    "get_tp_group",
+    "get_tensor_model_parallel_rank",
+    "get_tensor_model_parallel_world_size",
+    "tensor_model_parallel_all_reduce",
+    "tensor_model_parallel_all_gather",
+    # DP
+    "get_dp_group",
+    "get_data_parallel_rank",
+    "get_data_parallel_world_size",
+    "data_parallel_all_reduce",
+    # PP
+    "get_pp_group",
+    "get_pipeline_model_parallel_rank",
+    "get_pipeline_model_parallel_world_size",
+    # State
+    "initialize_model_parallel",
+    "model_parallel_is_initialized",
+]
diff --git a/pymllm/orchestrator/group_coordinator.py b/pymllm/orchestrator/group_coordinator.py
new file mode 100644
index 00000000..d0624473
--- /dev/null
+++ b/pymllm/orchestrator/group_coordinator.py
@@ -0,0 +1,98 @@
+"""GroupCoordinator for distributed communication."""
+
+from typing import List, Optional
+import torch
+import torch.distributed as dist
+
+
+class GroupCoordinator:
+    """Manages a group of processes for distributed communication.
+    
+    Lightweight wrapper around torch.distributed.ProcessGroup.
+    
+    Args:
+        ranks: List of global ranks in this group
+        local_rank: Local rank for device assignment
+        backend: Backend to use (nccl, gloo, etc.)
+    """
+    
+    def __init__(
+        self,
+        ranks: List[int],
+        local_rank: int,
+        backend: str = "nccl",
+    ):
+        self.ranks = ranks
+        self.local_rank = local_rank
+        self.backend = backend
+        self.world_size = len(ranks)
+        
+        # Get rank in this specific group
+        self.rank_in_group = ranks.index(dist.get_rank()) if dist.is_initialized() else 0
+        
+        # Create process group
+        if dist.is_initialized() and self.world_size > 1:
+            self.device_group = dist.new_group(ranks, backend=backend)
+        else:
+            self.device_group = None
+    
+    def all_reduce(self, tensor: torch.Tensor) -> torch.Tensor:
+        """All-reduce across the group."""
+        if self.device_group is not None:
+            dist.all_reduce(tensor, group=self.device_group)
+        return tensor
+    
+    def all_gather(self, tensor: torch.Tensor, dim: int = 0) -> torch.Tensor:
+        """All-gather across the group."""
+        if self.device_group is None:
+            return tensor
+        
+        world_size = self.world_size
+        if dim == 0:
+            shape = list(tensor.shape)
+            shape[0] = shape[0] * world_size
+            output = torch.empty(shape, dtype=tensor.dtype, device=tensor.device)
+            dist.all_gather_into_tensor(output, tensor, group=self.device_group)
+            return output
+        else:
+            # For non-dim-0 gathers, use tensor list
+            tensor_list = [
+                torch.empty_like(tensor) for _ in range(world_size)
+            ]
+            dist.all_gather(tensor_list, tensor, group=self.device_group)
+            return torch.cat(tensor_list, dim=dim)
+    
+    def broadcast(self, tensor: torch.Tensor, src: int = 0) -> torch.Tensor:
+        """Broadcast from source rank to all."""
+        if self.device_group is not None:
+            dist.broadcast(tensor, src=src, group=self.device_group)
+        return tensor
+
+
+def divide(numerator: int, denominator: int) -> int:
+    """Divide and ensure divisibility."""
+    assert numerator % denominator == 0, (
+        f"{numerator} is not divisible by {denominator}"
+    )
+    return numerator // denominator
+
+
+def split_tensor_along_dim(
+    tensor: torch.Tensor,
+    dim: int,
+    world_size: int,
+    rank: int,
+) -> torch.Tensor:
+    """Split tensor along a dimension for tensor parallelism."""
+    dim_size = tensor.size(dim)
+    assert dim_size % world_size == 0, (
+        f"Dimension {dim} ({dim_size}) not divisible by world_size {world_size}"
+    )
+    
+    chunk_size = dim_size // world_size
+    start = rank * chunk_size
+    end = start + chunk_size
+    
+    slices = [slice(None)] * tensor.ndim
+    slices[dim] = slice(start, end)
+    return tensor[tuple(slices)]
diff --git a/pymllm/orchestrator/parallel_state.py b/pymllm/orchestrator/parallel_state.py
new file mode 100644
index 00000000..545c74a8
--- /dev/null
+++ b/pymllm/orchestrator/parallel_state.py
@@ -0,0 +1,207 @@
+"""Parallel state management for tensor and pipeline parallelism."""
+
+import logging
+import torch
+import torch.distributed as dist
+from typing import Optional
+
+from pymllm.configs.global_config import get_global_config
+from pymllm.orchestrator.group_coordinator import GroupCoordinator
+
+logger = logging.getLogger(__name__)
+
+
+# Global groups
+_TP_GROUP: Optional[GroupCoordinator] = None
+_DP_GROUP: Optional[GroupCoordinator] = None
+_PP_GROUP: Optional[GroupCoordinator] = None
+
+
+def initialize_model_parallel(
+    tensor_model_parallel_size: int = 1,
+    data_parallel_size: int = 1,
+    pipeline_model_parallel_size: int = 1,
+    backend: str = "nccl",
+) -> None:
+    """Initialize model parallel groups.
+
+    Args:
+        tensor_model_parallel_size: Number of GPUs for tensor parallelism
+        data_parallel_size: Number of GPUs for data parallelism
+        pipeline_model_parallel_size: Number of stages for pipeline parallelism
+        backend: Communication backend (nccl for GPU, gloo for CPU)
+    """
+    global _TP_GROUP, _DP_GROUP, _PP_GROUP
+
+    if not dist.is_initialized():
+        return
+
+    world_size = dist.get_world_size()
+    world_rank = dist.get_rank()
+    local_rank = int(torch.cuda.current_device()) if torch.cuda.is_available() else 0
+
+    config = get_global_config()
+
+    # Update runtime config
+    config.runtime.world_size = world_size
+    config.runtime.world_rank = world_rank
+    config.runtime.local_rank = local_rank
+    config.runtime.tp_size = tensor_model_parallel_size
+    config.runtime.dp_size = data_parallel_size
+    config.runtime.pp_size = pipeline_model_parallel_size
+
+    # Logging
+    logger.info(
+        "Model parallel runtime config set: world_size=%s, world_rank=%s, "
+        "local_rank=%s, tp_size=%s, dp_size=%s, pp_size=%s",
+        config.runtime.world_size,
+        config.runtime.world_rank,
+        config.runtime.local_rank,
+        config.runtime.tp_size,
+        config.runtime.dp_size,
+        config.runtime.pp_size,
+    )
+
+    # Validate parallelism setup
+    assert (
+        tensor_model_parallel_size * data_parallel_size * pipeline_model_parallel_size
+        == world_size
+    ), (
+        f"TP({tensor_model_parallel_size}) * DP({data_parallel_size}) * "
+        f"PP({pipeline_model_parallel_size}) != World({world_size})"
+    )
+
+    # Create TP groups (intra-layer sharding)
+    if tensor_model_parallel_size > 1:
+        num_tp_groups = world_size // tensor_model_parallel_size
+        for i in range(num_tp_groups):
+            ranks = list(
+                range(
+                    i * tensor_model_parallel_size, (i + 1) * tensor_model_parallel_size
+                )
+            )
+            if world_rank in ranks:
+                _TP_GROUP = GroupCoordinator(
+                    ranks=ranks,
+                    local_rank=local_rank,
+                    backend=backend,
+                )
+                config.runtime.tp_rank = _TP_GROUP.rank_in_group
+                break
+    else:
+        _TP_GROUP = None
+        config.runtime.tp_rank = 0
+
+    # Create DP groups (data replication)
+    if data_parallel_size > 1:
+        num_dp_groups = world_size // data_parallel_size
+        for i in range(num_dp_groups):
+            ranks = list(range(i, world_size, num_dp_groups))
+            if world_rank in ranks:
+                _DP_GROUP = GroupCoordinator(
+                    ranks=ranks,
+                    local_rank=local_rank,
+                    backend=backend,
+                )
+                config.runtime.dp_rank = _DP_GROUP.rank_in_group
+                break
+    else:
+        _DP_GROUP = None
+        config.runtime.dp_rank = 0
+
+    # Create PP groups (inter-layer partitioning)
+    if pipeline_model_parallel_size > 1:
+        num_pp_groups = world_size // pipeline_model_parallel_size
+        for i in range(num_pp_groups):
+            start = i * pipeline_model_parallel_size
+            ranks = list(range(start, start + pipeline_model_parallel_size))
+            if world_rank in ranks:
+                _PP_GROUP = GroupCoordinator(
+                    ranks=ranks,
+                    local_rank=local_rank,
+                    backend=backend,
+                )
+                config.runtime.pp_rank = _PP_GROUP.rank_in_group
+                break
+    else:
+        _PP_GROUP = None
+        config.runtime.pp_rank = 0
+
+
+def get_tp_group() -> Optional[GroupCoordinator]:
+    """Get the tensor model parallel group."""
+    return _TP_GROUP
+
+
+def get_dp_group() -> Optional[GroupCoordinator]:
+    """Get the data parallel group."""
+    return _DP_GROUP
+
+
+def get_pp_group() -> Optional[GroupCoordinator]:
+    """Get the pipeline parallel group."""
+    return _PP_GROUP
+
+
+# Convenience functions for tensor parallelism
+def get_tensor_model_parallel_rank() -> int:
+    """Get current tensor model parallel rank."""
+    return get_global_config().runtime.tp_rank
+
+
+def get_tensor_model_parallel_world_size() -> int:
+    """Get tensor model parallel world size."""
+    return get_global_config().runtime.tp_size
+
+
+def get_data_parallel_rank() -> int:
+    """Get current data parallel rank."""
+    return get_global_config().runtime.dp_rank
+
+
+def get_data_parallel_world_size() -> int:
+    """Get data parallel world size."""
+    return get_global_config().runtime.dp_size
+
+
+def get_pipeline_model_parallel_rank() -> int:
+    """Get current pipeline parallel rank."""
+    return get_global_config().runtime.pp_rank
+
+
+def get_pipeline_model_parallel_world_size() -> int:
+    """Get pipeline parallel world size."""
+    return get_global_config().runtime.pp_size
+
+
+def model_parallel_is_initialized() -> bool:
+    """Check if model parallel is initialized."""
+    return _TP_GROUP is not None or _DP_GROUP is not None or _PP_GROUP is not None
+
+
+# Communication helpers
+def tensor_model_parallel_all_reduce(tensor: torch.Tensor) -> torch.Tensor:
+    """All-reduce across TP group."""
+    group = get_tp_group()
+    if group is None:
+        return tensor
+    return group.all_reduce(tensor)
+
+
+def tensor_model_parallel_all_gather(
+    tensor: torch.Tensor,
+    dim: int = 0,
+) -> torch.Tensor:
+    """All-gather across TP group."""
+    group = get_tp_group()
+    if group is None:
+        return tensor
+    return group.all_gather(tensor, dim=dim)
+
+
+def data_parallel_all_reduce(tensor: torch.Tensor) -> torch.Tensor:
+    """All-reduce across DP group."""
+    group = get_dp_group()
+    if group is None:
+        return tensor
+    return group.all_reduce(tensor)
diff --git a/pymllm/quantization/__init__.py b/pymllm/quantization/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/quantization/methods/__init__.py b/pymllm/quantization/methods/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/quantization/methods/awq_w4a16.py b/pymllm/quantization/methods/awq_w4a16.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/quantization/quant_recipe.py b/pymllm/quantization/quant_recipe.py
new file mode 100644
index 00000000..a5b493be
--- /dev/null
+++ b/pymllm/quantization/quant_recipe.py
@@ -0,0 +1,3 @@
+class QuantRecipe:
+    def __init__(self):
+        pass
diff --git a/pymllm/tests/README.md b/pymllm/tests/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/tests/test_vocab_parallel_embedding.py b/pymllm/tests/test_vocab_parallel_embedding.py
new file mode 100644
index 00000000..e22b52a5
--- /dev/null
+++ b/pymllm/tests/test_vocab_parallel_embedding.py
@@ -0,0 +1,310 @@
+"""Tests for VocabParallelEmbedding layer.
+
+This module tests the VocabParallelEmbedding layer with and without
+tensor parallelism.
+"""
+
+import os
+import logging
+import pytest
+import torch
+import torch.nn as nn
+import torch.multiprocessing as mp
+from typing import Callable
+
+from pymllm.configs import get_global_config
+from pymllm.layers import VocabParallelEmbedding
+from pymllm.orchestrator import (
+    initialize_model_parallel,
+)
+
+# Show runtime init logs during test execution.
+logging.basicConfig(level=logging.INFO, force=True)
+logging.getLogger().setLevel(logging.INFO)
+
+
+# =============================================================================
+# Helper: weight loading
+# =============================================================================
+def load_weight(param: nn.Parameter, loaded_weight: torch.Tensor) -> None:
+    """Load weight using the weight_loader attached to param attribute."""
+    weight_loader = getattr(param, "weight_loader", None)
+    if weight_loader is None:
+        # Fallback: direct copy
+        param.data.copy_(loaded_weight)
+    else:
+        # Call the loader attached to param
+        weight_loader(param, loaded_weight)
+
+
+# =============================================================================
+# Real distributed tests with world_size=8 on CUDA
+# =============================================================================
+def run_worker_tp8_cuda(
+    rank: int,
+    local_rank: int,
+    world_size: int,
+    local_world_size: int,
+    test_func: Callable,
+    return_dict: dict,
+):
+    """Worker function for multi-process testing with TP=8 on CUDA.
+
+    Args:
+        rank: Global rank across all nodes
+        local_rank: Local rank within this node (used for GPU binding)
+        world_size: Total number of processes across all nodes
+        local_world_size: Number of processes on this node
+        test_func: Test function to run
+        return_dict: Shared dict for returning results
+    """
+    os.environ["MASTER_ADDR"] = "localhost"
+    os.environ["MASTER_PORT"] = "29500"
+
+    # Set device using local_rank (binds to GPU 0,1,2,3 on this node)
+    torch.cuda.set_device(local_rank)
+
+    torch.distributed.init_process_group(
+        backend="nccl",
+        rank=rank,
+        world_size=world_size,
+    )
+
+    initialize_model_parallel(tensor_model_parallel_size=8)
+
+    try:
+        result = test_func(rank, local_rank, world_size)
+        return_dict[rank] = result
+    except Exception as e:
+        import traceback
+
+        return_dict[rank] = f"ERROR: {e}\n{traceback.format_exc()}"
+    finally:
+        torch.distributed.destroy_process_group()
+
+
+def embedding_forward_tp8_worker_cuda(rank: int, local_rank: int, world_size: int):
+    """Test forward pass with real TP=8 on CUDA.
+
+    Args:
+        rank: Global rank
+        local_rank: Local rank within this node (for logging/debugging)
+        world_size: Total world size
+    """
+    config = get_global_config()
+
+    assert config.runtime.tp_size == 8, f"Rank {rank}: tp_size should be 8"
+    assert config.runtime.tp_rank == rank, f"Rank {rank}: tp_rank mismatch"
+
+    vocab_size = 1024
+    embed_dim = 64
+    # .cuda() uses the device set by torch.cuda.set_device(local_rank)
+    layer = VocabParallelEmbedding(vocab_size, embed_dim).cuda()
+
+    # Verify the layer is on the correct GPU
+    assert layer.weight.device.index == local_rank, (
+        f"Rank {rank}: weight should be on GPU {local_rank}, got {layer.weight.device}"
+    )
+
+    expected_shard_size = vocab_size // 8
+    assert layer.num_embeddings_per_partition == expected_shard_size
+    assert layer.weight.shape == (expected_shard_size, embed_dim)
+
+    # Each rank initializes its own shard with known pattern
+    with torch.no_grad():
+        layer.weight.fill_(float(rank + 1))  # Rank 0: 1.0, Rank 1: 2.0, ...
+
+    # Create input on the correct GPU
+    input_ids = torch.tensor([[0, 128, 256, 384], [512, 640, 768, 896]], device="cuda")
+
+    output = layer(input_ids)
+    assert output.shape == (2, 4, embed_dim)
+
+    # Verify output is on correct GPU
+    assert output.device.index == local_rank, (
+        f"Rank {rank}: output should be on GPU {local_rank}, got {output.device}"
+    )
+
+    if rank == 0:
+        # Each token is owned by exactly one TP rank. Since each rank fills its
+        # local shard with (rank + 1), post-all-reduce output must match below.
+        expected_token_values = torch.tensor(
+            [[1, 2, 3, 4], [5, 6, 7, 8]],
+            device=output.device,
+            dtype=output.dtype,
+        )
+        expected_output = expected_token_values.unsqueeze(-1).expand(-1, -1, embed_dim)
+
+        if torch.equal(output, expected_output):
+            return "PASSED"
+        return "FAILED: embedding output does not match expected TP aggregation"
+
+    return "OK"
+
+
+def weight_loading_tp8_worker_cuda(rank: int, local_rank: int, world_size: int):
+    """Test weight loading with real TP=8 on CUDA.
+
+    Args:
+        rank: Global rank
+        local_rank: Local rank within this node (for GPU binding verification)
+        world_size: Total world size
+    """
+    vocab_size = 1024
+    embed_dim = 64
+    layer = VocabParallelEmbedding(vocab_size, embed_dim).cuda()
+
+    # Verify the layer is on the correct GPU
+    assert layer.weight.device.index == local_rank, (
+        f"Rank {rank}: weight should be on GPU {local_rank}, got {layer.weight.device}"
+    )
+
+    full_weight = torch.randn(vocab_size, embed_dim)
+    load_weight(layer.weight, full_weight.cuda())
+
+    shard_size = vocab_size // 8
+    start_idx = rank * shard_size
+    end_idx = start_idx + shard_size
+    expected_shard = full_weight[start_idx:end_idx]
+
+    if not torch.allclose(layer.weight.cpu(), expected_shard):
+        return f"FAILED: shard mismatch at rank {rank}"
+
+    if rank == 0:
+        gathered_shards = [layer.weight.cpu().clone()]
+        for other_rank in range(1, 8):
+            other_shard = full_weight[
+                other_rank * shard_size : (other_rank + 1) * shard_size
+            ]
+            gathered_shards.append(other_shard)
+
+        reconstructed = torch.cat(gathered_shards, dim=0)
+        if torch.allclose(reconstructed, full_weight):
+            return "PASSED"
+        else:
+            return "FAILED: reconstruction mismatch"
+
+    return "OK"
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+@pytest.mark.skipif(torch.cuda.device_count() < 8, reason="Requires at least 8 GPUs")
+class TestVocabParallelEmbeddingRealTP8:
+    """Real distributed tests with world_size=8 and TP=8 on CUDA."""
+
+    def test_forward_pass_tp8_real(self):
+        """Test forward pass with real TP=8 using 8 processes on CUDA."""
+        world_size = 8
+        local_world_size = 8  # Single node with 8 GPUs
+
+        mp.set_start_method("spawn", force=True)
+
+        manager = mp.Manager()
+        return_dict = manager.dict()
+
+        processes = []
+        for rank in range(world_size):
+            # In single-node setup, local_rank == rank
+            local_rank = rank
+            p = mp.Process(
+                target=run_worker_tp8_cuda,
+                args=(
+                    rank,
+                    local_rank,
+                    world_size,
+                    local_world_size,
+                    embedding_forward_tp8_worker_cuda,
+                    return_dict,
+                ),
+            )
+            p.start()
+            processes.append(p)
+
+        for p in processes:
+            p.join(timeout=120)
+            if p.is_alive():
+                p.terminate()
+                p.join()
+
+        for rank in range(world_size):
+            result = return_dict.get(rank, "TIMEOUT")
+            if rank == 0:
+                assert result == "PASSED", f"Rank {rank} failed: {result}"
+            else:
+                assert "ERROR" not in str(result), f"Rank {rank} error: {result}"
+
+    def test_weight_loading_tp8_real(self):
+        """Test weight loading with real TP=8 using 8 processes on CUDA."""
+        world_size = 8
+        local_world_size = 8  # Single node with 8 GPUs
+
+        mp.set_start_method("spawn", force=True)
+
+        manager = mp.Manager()
+        return_dict = manager.dict()
+
+        processes = []
+        for rank in range(world_size):
+            # In single-node setup, local_rank == rank
+            local_rank = rank
+            p = mp.Process(
+                target=run_worker_tp8_cuda,
+                args=(
+                    rank,
+                    local_rank,
+                    world_size,
+                    local_world_size,
+                    weight_loading_tp8_worker_cuda,
+                    return_dict,
+                ),
+            )
+            p.start()
+            processes.append(p)
+
+        for p in processes:
+            p.join(timeout=120)
+            if p.is_alive():
+                p.terminate()
+                p.join()
+
+        for rank in range(world_size):
+            result = return_dict.get(rank, "TIMEOUT")
+            if rank == 0:
+                assert result == "PASSED", f"Rank {rank} failed: {result}"
+            else:
+                assert "ERROR" not in str(result), f"Rank {rank} error: {result}"
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+class TestVocabParallelEmbeddingCUDA:
+    """Tests for non-parallel TP=1 mode on CUDA."""
+
+    @pytest.fixture(autouse=True)
+    def setup_config(self):
+        config = get_global_config()
+        config.runtime.tp_size = 1
+        config.runtime.tp_rank = 0
+        yield
+        config.runtime.tp_size = 1
+        config.runtime.tp_rank = 0
+
+    def test_cuda_forward(self):
+        layer = VocabParallelEmbedding(1000, 512).cuda()
+        input_ids = torch.randint(0, 1000, (4, 32), device="cuda")
+
+        output = layer(input_ids)
+
+        assert output.device.type == "cuda"
+        assert output.shape == (4, 32, 512)
+
+    def test_cuda_weight_loader(self):
+        layer = VocabParallelEmbedding(100, 64).cuda()
+
+        cpu_weight = torch.randn(100, 64)
+        load_weight(layer.weight, cpu_weight.cuda())
+
+        assert torch.allclose(layer.weight.cpu(), cpu_weight)
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/pyproject.toml b/pyproject.toml
index 89d69947..160341ba 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,7 @@ dependencies=[
 cuda = ["tilelang", "flashinfer-python"]
 
 [project.scripts]
+pymllm = "pymllm.__main__:main"
 mllm-convertor = "pymllm.mobile.utils.mllm_convertor:main"
 mllm-service = "pymllm.mobile.service.tools:cli_app"
 

From 731ea71892fd6c9be64a0449d7d11f84948f03f1 Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Thu, 19 Feb 2026 08:53:02 +0000
Subject: [PATCH 05/13] feat: enhance layer implementations and add new
 components

- Updated `.codespellrc` to include 'flashinfer' in the ignore words list.
- Introduced new files for `launch_server`, `prepare`, and various layer implementations including `LayerNorm`, `RMSNorm`, and `MLP`.
- Added `ColumnParallelLinear` and `RowParallelLinear` classes for efficient linear operations in tensor parallelism.
- Implemented rotary embedding functions in `rope.py` for enhanced model performance.
- Created caching mechanisms in `param_disk_cache.py` and `radix_cache.py` for improved memory management.
- Refactored `GroupCoordinator` to enhance broadcasting functionality in distributed settings.
---
 .codespellrc                             |   2 +-
 pymllm/executor/__init__.py              |   0
 pymllm/executor/cuda_graph_runner.py     |   0
 pymllm/launch_server.py                  |   0
 pymllm/layers/__init__.py                |  24 ++
 pymllm/layers/base.py                    |   3 +-
 pymllm/layers/embedding.py               |  10 +-
 pymllm/layers/layer_norm.py              |  43 ++++
 pymllm/layers/linear.py                  | 263 +++++++++++++++++++++
 pymllm/layers/mlp.py                     | 199 ++++++++++++++++
 pymllm/layers/rms_norm.py                |  64 ++++++
 pymllm/layers/rope.py                    | 276 +++++++++++++++++++++++
 pymllm/mem_cache/param_disk_cache.py     |   0
 pymllm/mem_cache/radix_cache.py          |   0
 pymllm/orchestrator/group_coordinator.py |  12 +-
 pymllm/prepare.py                        |   0
 16 files changed, 890 insertions(+), 6 deletions(-)
 create mode 100644 pymllm/executor/__init__.py
 create mode 100644 pymllm/executor/cuda_graph_runner.py
 create mode 100644 pymllm/launch_server.py
 create mode 100644 pymllm/layers/layer_norm.py
 create mode 100644 pymllm/layers/linear.py
 create mode 100644 pymllm/layers/rope.py
 create mode 100644 pymllm/mem_cache/param_disk_cache.py
 create mode 100644 pymllm/mem_cache/radix_cache.py
 create mode 100644 pymllm/prepare.py

diff --git a/.codespellrc b/.codespellrc
index 9ddb9d85..bbf02bd1 100644
--- a/.codespellrc
+++ b/.codespellrc
@@ -1,3 +1,3 @@
 [codespell]
-ignore-words-list = ans, als, hel, boostrap, childs, te, vas, hsa, ment, cann, thi, makro, wil, rouge, PRIS, bfloat, constexpr, cuda, dlpack, expt, forceinline, ifndef, linalg, LPBQ, mllm, pymllm, Quantizaton, Qwen, ROCM, silu, torchao
+ignore-words-list = ans, als, hel, boostrap, childs, te, vas, hsa, ment, cann, thi, makro, wil, rouge, PRIS, bfloat, constexpr, cuda, dlpack, expt, forceinline, ifndef, linalg, LPBQ, mllm, pymllm, Quantizaton, Qwen, ROCM, silu, torchao, flashinfer
 skip = *.json,*.jsonl,*.patch,*.txt
diff --git a/pymllm/executor/__init__.py b/pymllm/executor/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/executor/cuda_graph_runner.py b/pymllm/executor/cuda_graph_runner.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/launch_server.py b/pymllm/launch_server.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/layers/__init__.py b/pymllm/layers/__init__.py
index 6f70a4d1..fd9a070e 100644
--- a/pymllm/layers/__init__.py
+++ b/pymllm/layers/__init__.py
@@ -2,10 +2,34 @@
 
 from pymllm.layers.base import MllmBaseLayer
 from pymllm.layers.embedding import VocabParallelEmbedding
+from pymllm.layers.layer_norm import LayerNorm
+from pymllm.layers.linear import ColumnParallelLinear, Linear, RowParallelLinear
+from pymllm.layers.mlp import MLP, ParallelMLP
+from pymllm.layers.rms_norm import GemmaRMSNorm, RMSNorm
+from pymllm.layers.rope import (
+    apply_llama31_rope,
+    apply_llama31_rope_pos_ids,
+    apply_rope,
+    apply_rope_pos_ids,
+    apply_rope_with_cos_sin_cache,
+)
 from pymllm.layers.utils import set_weight_attrs
 
 __all__ = [
     "MllmBaseLayer",
     "set_weight_attrs",
     "VocabParallelEmbedding",
+    "ColumnParallelLinear",
+    "Linear",
+    "RowParallelLinear",
+    "MLP",
+    "ParallelMLP",
+    "LayerNorm",
+    "RMSNorm",
+    "GemmaRMSNorm",
+    "apply_rope",
+    "apply_llama31_rope",
+    "apply_rope_pos_ids",
+    "apply_llama31_rope_pos_ids",
+    "apply_rope_with_cos_sin_cache",
 ]
diff --git a/pymllm/layers/base.py b/pymllm/layers/base.py
index 5dc519f4..3044e206 100644
--- a/pymllm/layers/base.py
+++ b/pymllm/layers/base.py
@@ -3,12 +3,13 @@
 from torch.nn import Parameter
 from pymllm.layers.utils import set_weight_attrs
 from pymllm.quantization.quant_recipe import QuantRecipe
+from typing import Optional
 
 
 class MllmBaseLayer(nn.Module):
     def __init__(self):
         super().__init__()
-        self.quant_recipe: QuantRecipe = None
+        self.quant_recipe: Optional[QuantRecipe] = None
 
     def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
         """Load weights into a parameter.
diff --git a/pymllm/layers/embedding.py b/pymllm/layers/embedding.py
index 0442caa4..ec99c5b2 100644
--- a/pymllm/layers/embedding.py
+++ b/pymllm/layers/embedding.py
@@ -120,6 +120,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         Returns:
             Embedded representation (all-reduced across TP group if needed).
         """
+        local_padding_idx = self.padding_idx
         if self.tp_size > 1:
             # Create mask for valid vocab range
             vocab_mask = (x >= self.vocab_start_index) & (x < self.vocab_end_index)
@@ -130,6 +131,13 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
                 x - self.vocab_start_index,
                 torch.zeros_like(x),  # Invalid indices become 0 (will be masked)
             )
+            # F.embedding expects indices in local weight-table space.
+            # Only pass padding_idx on the owning rank, remapped to local offset.
+            if self.padding_idx is not None:
+                if self.vocab_start_index <= self.padding_idx < self.vocab_end_index:
+                    local_padding_idx = self.padding_idx - self.vocab_start_index
+                else:
+                    local_padding_idx = None
         else:
             masked_input = x
             vocab_mask = None
@@ -138,7 +146,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         output = F.embedding(
             masked_input.long(),
             self.weight,
-            padding_idx=self.padding_idx if self.padding_idx is not None else None,
+            padding_idx=local_padding_idx,
         )
 
         # Mask invalid positions (for TP)
diff --git a/pymllm/layers/layer_norm.py b/pymllm/layers/layer_norm.py
new file mode 100644
index 00000000..54d94c19
--- /dev/null
+++ b/pymllm/layers/layer_norm.py
@@ -0,0 +1,43 @@
+from __future__ import annotations
+
+import torch
+import flashinfer
+from torch.nn import Parameter
+
+from pymllm.layers.base import MllmBaseLayer
+from pymllm.layers.utils import set_weight_attrs
+
+
+class LayerNorm(MllmBaseLayer):
+    """LayerNorm layer implemented with FlashInfer kernel."""
+
+    def __init__(self, hidden_size: int, eps: float = 1e-6):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.eps = eps
+
+        # flashinfer.norm.layernorm expects gamma/beta in fp32.
+        self.weight = Parameter(torch.ones(hidden_size, dtype=torch.float32))
+        self.bias = Parameter(torch.zeros(hidden_size, dtype=torch.float32))
+        set_weight_attrs(self.weight, {"weight_loader": self.weight_loader})
+        set_weight_attrs(self.bias, {"weight_loader": self.weight_loader})
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if x.shape[-1] != self.hidden_size:
+            raise ValueError(
+                f"Expected last dim == hidden_size ({self.hidden_size}), "
+                f"but got input shape {tuple(x.shape)}"
+            )
+        if x.dtype != torch.bfloat16:
+            raise TypeError(
+                "flashinfer.norm.layernorm requires bfloat16 input, "
+                f"but got {x.dtype}"
+            )
+
+        if x.dim() == 2:
+            return flashinfer.norm.layernorm(x, self.weight, self.bias, self.eps)
+
+        original_shape = x.shape
+        x_2d = x.reshape(-1, self.hidden_size)
+        out = flashinfer.norm.layernorm(x_2d, self.weight, self.bias, self.eps)
+        return out.reshape(original_shape)
diff --git a/pymllm/layers/linear.py b/pymllm/layers/linear.py
new file mode 100644
index 00000000..dc583e93
--- /dev/null
+++ b/pymllm/layers/linear.py
@@ -0,0 +1,263 @@
+from __future__ import annotations
+
+import torch
+import torch.nn.functional as F
+from torch.nn import Parameter
+
+from pymllm.layers.base import MllmBaseLayer
+from pymllm.layers.utils import set_weight_attrs
+from pymllm.orchestrator import (
+    divide,
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
+    tensor_model_parallel_all_gather,
+    tensor_model_parallel_all_reduce,
+)
+
+
+class ColumnParallelLinear(MllmBaseLayer):
+    """Linear layer with column parallelism (output-dimension sharding).
+
+    The weight matrix is split along the output dimension across TP ranks.
+    Each rank holds ``out_features / tp_size`` rows of the weight.
+
+    Args:
+        in_features: Size of each input sample.
+        out_features: Size of each output sample (before sharding).
+        bias: If ``True``, adds a learnable bias.
+        gather_output: If ``True``, all-gather the output across TP ranks
+            so every rank gets the full ``out_features``.  Set to ``False``
+            when the next layer is a :class:`RowParallelLinear` that expects
+            a split input.
+    """
+
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int,
+        bias: bool = True,
+        gather_output: bool = True,
+    ):
+        super().__init__()
+
+        self.tp_rank = get_tensor_model_parallel_rank()
+        self.tp_size = get_tensor_model_parallel_world_size()
+
+        self.in_features = in_features
+        self.out_features = out_features
+        self.gather_output = gather_output
+
+        if out_features % self.tp_size != 0:
+            raise ValueError(
+                f"out_features ({out_features}) must be divisible by "
+                f"tp_size ({self.tp_size})"
+            )
+        self.out_features_per_partition = divide(out_features, self.tp_size)
+
+        self.output_start_index = self.tp_rank * self.out_features_per_partition
+        self.output_end_index = self.output_start_index + self.out_features_per_partition
+
+        self.weight = Parameter(
+            torch.empty(self.out_features_per_partition, in_features)
+        )
+        set_weight_attrs(
+            self.weight,
+            {
+                "output_dim": 0,
+                "input_dim": 1,
+                "weight_loader": self.weight_loader,
+            },
+        )
+
+        if bias:
+            self.bias_flag = True
+            self.bias = Parameter(torch.empty(self.out_features_per_partition))
+            set_weight_attrs(
+                self.bias,
+                {
+                    "output_dim": 0,
+                    "weight_loader": self.weight_loader,
+                },
+            )
+        else:
+            self.bias_flag = False
+            self.register_parameter("bias", None)
+
+    def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
+        """Load sharded weights into the parameter.
+
+        Args:
+            param: The parameter to load weights into.
+            loaded_weight: The weight tensor loaded from checkpoint (full size).
+        """
+        output_dim = getattr(param, "output_dim", None)
+
+        if output_dim is None or self.tp_size == 1:
+            assert param.data.shape == loaded_weight.shape, (
+                f"Shape mismatch: param {param.data.shape} vs "
+                f"loaded {loaded_weight.shape}"
+            )
+            param.data.copy_(loaded_weight)
+        else:
+            shard_weight = loaded_weight.narrow(
+                output_dim,
+                self.output_start_index,
+                self.out_features_per_partition,
+            )
+            assert param.data.shape == shard_weight.shape, (
+                f"Shard shape mismatch: param {param.data.shape} vs "
+                f"shard {shard_weight.shape}"
+            )
+            param.data.copy_(shard_weight)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        output = F.linear(x, self.weight, self.bias)
+
+        if self.gather_output and self.tp_size > 1:
+            output = tensor_model_parallel_all_gather(output, dim=-1)
+
+        return output
+
+
+class RowParallelLinear(MllmBaseLayer):
+    """Linear layer with row parallelism (input-dimension sharding).
+
+    The weight matrix is split along the input dimension across TP ranks.
+    Each rank holds all ``out_features`` rows but only
+    ``in_features / tp_size`` columns.
+
+    Typically placed after a :class:`ColumnParallelLinear` whose
+    ``gather_output=False``, so the input is already split.
+
+    Args:
+        in_features: Size of each input sample (before sharding).
+        out_features: Size of each output sample.
+        bias: If ``True``, adds a learnable bias (applied after all-reduce).
+        reduce_output: If ``True``, all-reduce the output across TP ranks.
+    """
+
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int,
+        bias: bool = True,
+        reduce_output: bool = True,
+    ):
+        super().__init__()
+
+        self.tp_rank = get_tensor_model_parallel_rank()
+        self.tp_size = get_tensor_model_parallel_world_size()
+
+        self.in_features = in_features
+        self.out_features = out_features
+        self.reduce_output = reduce_output
+
+        if in_features % self.tp_size != 0:
+            raise ValueError(
+                f"in_features ({in_features}) must be divisible by "
+                f"tp_size ({self.tp_size})"
+            )
+        self.in_features_per_partition = divide(in_features, self.tp_size)
+
+        self.input_start_index = self.tp_rank * self.in_features_per_partition
+        self.input_end_index = self.input_start_index + self.in_features_per_partition
+
+        self.weight = Parameter(
+            torch.empty(out_features, self.in_features_per_partition)
+        )
+        set_weight_attrs(
+            self.weight,
+            {
+                "output_dim": 0,
+                "input_dim": 1,
+                "weight_loader": self.weight_loader,
+            },
+        )
+
+        if bias:
+            self.bias_flag = True
+            self.bias = Parameter(torch.empty(out_features))
+            set_weight_attrs(self.bias, {"weight_loader": self.weight_loader})
+        else:
+            self.bias_flag = False
+            self.register_parameter("bias", None)
+
+    def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
+        """Load sharded weights into the parameter.
+
+        Args:
+            param: The parameter to load weights into.
+            loaded_weight: The weight tensor loaded from checkpoint (full size).
+        """
+        input_dim = getattr(param, "input_dim", None)
+
+        if input_dim is None or self.tp_size == 1:
+            assert param.data.shape == loaded_weight.shape, (
+                f"Shape mismatch: param {param.data.shape} vs "
+                f"loaded {loaded_weight.shape}"
+            )
+            param.data.copy_(loaded_weight)
+        else:
+            shard_weight = loaded_weight.narrow(
+                input_dim,
+                self.input_start_index,
+                self.in_features_per_partition,
+            )
+            assert param.data.shape == shard_weight.shape, (
+                f"Shard shape mismatch: param {param.data.shape} vs "
+                f"shard {shard_weight.shape}"
+            )
+            param.data.copy_(shard_weight)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        output = F.linear(x, self.weight)
+
+        if self.reduce_output and self.tp_size > 1:
+            output = tensor_model_parallel_all_reduce(output)
+
+        if self.bias is not None:
+            output = output + self.bias
+
+        return output
+
+
+class Linear(MllmBaseLayer):
+    """Linear layer with simple quant dispatch."""
+
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int,
+        bias: bool = True,
+    ):
+        super().__init__()
+        self.in_features = in_features
+        self.out_features = out_features
+
+        self.weight = Parameter(torch.empty(out_features, in_features))
+        set_weight_attrs(
+            self.weight,
+            {
+                "output_dim": 0,
+                "input_dim": 1,
+                "weight_loader": self.weight_loader,
+            },
+        )
+
+        if bias:
+            self.bias = Parameter(torch.empty(out_features))
+            set_weight_attrs(self.bias, {"weight_loader": self.weight_loader})
+        else:
+            self.register_parameter("bias", None)
+
+    def _forward_torch_linear(self, x: torch.Tensor) -> torch.Tensor:
+        return F.linear(x, self.weight, self.bias)
+
+    def _forward_quant_linear(self, x: torch.Tensor) -> torch.Tensor:
+        # TODO(wch): Implement quantized linear path.
+        raise NotImplementedError("quant_linear is not implemented yet.")
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.quant_recipe is None:
+            return self._forward_torch_linear(x)
+        return self._forward_quant_linear(x)
diff --git a/pymllm/layers/mlp.py b/pymllm/layers/mlp.py
index e69de29b..1a40db92 100644
--- a/pymllm/layers/mlp.py
+++ b/pymllm/layers/mlp.py
@@ -0,0 +1,199 @@
+from __future__ import annotations
+
+import logging
+from typing import Callable, Literal, Optional
+
+import flashinfer
+import torch
+
+from pymllm.layers.base import MllmBaseLayer
+from pymllm.layers.linear import ColumnParallelLinear, Linear, RowParallelLinear
+
+logger = logging.getLogger(__name__)
+
+MLPActivation = Literal["silu", "gelu", "gelu_tanh"]
+
+_ACTIVATION_MAP: dict[MLPActivation, Callable[..., torch.Tensor]] = {
+    "silu": flashinfer.activation.silu_and_mul,
+    "gelu": flashinfer.activation.gelu_and_mul,
+    "gelu_tanh": flashinfer.activation.gelu_tanh_and_mul,
+}
+
+
+def _validate_mlp_args(
+    hidden_size: int, intermediate_size: int, activation: str
+) -> None:
+    if hidden_size <= 0:
+        raise ValueError(f"hidden_size must be > 0, but got {hidden_size}")
+    if intermediate_size <= 0:
+        raise ValueError(
+            f"intermediate_size must be > 0, but got {intermediate_size}"
+        )
+    if activation not in _ACTIVATION_MAP:
+        raise ValueError(
+            f"Unsupported activation '{activation}'. "
+            f"Expected one of: {list(_ACTIVATION_MAP)}"
+        )
+
+
+def _run_gated_activation(
+    gate_up: torch.Tensor,
+    intermediate_size: int,
+    activation: MLPActivation,
+    enable_pdl: Optional[bool],
+) -> torch.Tensor:
+    if gate_up.shape[-1] != 2 * intermediate_size:
+        raise ValueError(
+            "Expected last dim of gate_up tensor to be "
+            f"{2 * intermediate_size}, but got {gate_up.shape[-1]}"
+        )
+    return _ACTIVATION_MAP[activation](gate_up, enable_pdl=enable_pdl)
+
+
+class MLP(MllmBaseLayer):
+    """Feed-forward MLP block with FlashInfer fused gated activations.
+
+    Non-parallel version (TP=1). Uses :class:`Linear` for all projections.
+
+    Supported activations: ``silu``, ``gelu``, ``gelu_tanh``.
+    """
+
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        activation: MLPActivation = "silu",
+        use_fused_gate_up_proj: bool = True,
+        use_bias_gate_up: bool = False,
+        use_bias_down: bool = False,
+        enable_pdl: Optional[bool] = None,
+    ):
+        super().__init__()
+        _validate_mlp_args(hidden_size, intermediate_size, activation)
+
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.activation = activation
+        self.use_fused_gate_up_proj = use_fused_gate_up_proj
+        self.enable_pdl = enable_pdl
+
+        if not use_fused_gate_up_proj:
+            logger.warning(
+                "MLP with use_fused_gate_up_proj=False uses a lower-efficiency path. "
+                "Use use_fused_gate_up_proj=True for better performance.",
+            )
+
+        if use_fused_gate_up_proj:
+            self.gate_up_proj = Linear(
+                hidden_size, 2 * intermediate_size, bias=use_bias_gate_up,
+            )
+            self.gate_proj = None
+            self.up_proj = None
+        else:
+            self.gate_up_proj = None
+            self.gate_proj = Linear(
+                hidden_size, intermediate_size, bias=use_bias_gate_up,
+            )
+            self.up_proj = Linear(
+                hidden_size, intermediate_size, bias=use_bias_gate_up,
+            )
+
+        self.down_proj = Linear(
+            intermediate_size, hidden_size, bias=use_bias_down,
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if x.shape[-1] != self.hidden_size:
+            raise ValueError(
+                f"Expected last dim == hidden_size ({self.hidden_size}), "
+                f"but got input shape {tuple(x.shape)}"
+            )
+
+        if self.use_fused_gate_up_proj:
+            assert self.gate_up_proj is not None
+            gate_up = self.gate_up_proj(x)
+        else:
+            assert self.gate_proj is not None and self.up_proj is not None
+            gate_up = torch.cat([self.gate_proj(x), self.up_proj(x)], dim=-1)
+
+        hidden = _run_gated_activation(
+            gate_up, self.intermediate_size, self.activation, self.enable_pdl,
+        )
+        return self.down_proj(hidden)
+
+
+class ParallelMLP(MllmBaseLayer):
+    """Tensor-parallel MLP with column-sharded intermediate dimension.
+
+    Projection layout (Megatron-style):
+
+    - ``gate_proj``: :class:`ColumnParallelLinear`
+      ``(hidden_size → intermediate_size, gather_output=False)``
+    - ``up_proj``: :class:`ColumnParallelLinear`
+      ``(hidden_size → intermediate_size, gather_output=False)``
+    - ``down_proj``: :class:`RowParallelLinear`
+      ``(intermediate_size → hidden_size, reduce_output=True)``
+
+    Gate and up projections are kept separate so that each TP rank holds a
+    correctly paired ``[gate_shard, up_shard]`` for the gated activation.
+
+    Cost: **1 all-reduce** (inside ``down_proj``).
+
+    Input shape : ``(*, hidden_size)``  — full / replicated.
+    Output shape: ``(*, hidden_size)``  — full / replicated.
+
+    Args:
+        hidden_size: Model hidden dimension.
+        intermediate_size: Intermediate (expanded) dimension **before** TP
+            sharding.
+        activation: Gated activation type.
+        use_bias_gate_up: Add bias to the gate/up projections.
+        use_bias_down: Add bias to the down projection.
+        enable_pdl: FlashInfer PDL flag.
+    """
+
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        activation: MLPActivation = "silu",
+        use_bias_gate_up: bool = False,
+        use_bias_down: bool = False,
+        enable_pdl: Optional[bool] = None,
+    ):
+        super().__init__()
+        _validate_mlp_args(hidden_size, intermediate_size, activation)
+
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.activation = activation
+        self.enable_pdl = enable_pdl
+
+        self.gate_proj = ColumnParallelLinear(
+            hidden_size, intermediate_size,
+            bias=use_bias_gate_up, gather_output=False,
+        )
+        self.up_proj = ColumnParallelLinear(
+            hidden_size, intermediate_size,
+            bias=use_bias_gate_up, gather_output=False,
+        )
+
+        self.down_proj = RowParallelLinear(
+            intermediate_size, hidden_size,
+            bias=use_bias_down, reduce_output=True,
+        )
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if x.shape[-1] != self.hidden_size:
+            raise ValueError(
+                f"Expected last dim == hidden_size ({self.hidden_size}), "
+                f"but got input shape {tuple(x.shape)}"
+            )
+
+        gate_up = torch.cat([self.gate_proj(x), self.up_proj(x)], dim=-1)
+
+        shard_inter = self.down_proj.in_features_per_partition
+        hidden = _run_gated_activation(
+            gate_up, shard_inter, self.activation, self.enable_pdl,
+        )
+        return self.down_proj(hidden)
diff --git a/pymllm/layers/rms_norm.py b/pymllm/layers/rms_norm.py
index e69de29b..b55a0ea6 100644
--- a/pymllm/layers/rms_norm.py
+++ b/pymllm/layers/rms_norm.py
@@ -0,0 +1,64 @@
+from __future__ import annotations
+
+import torch
+import flashinfer
+from torch.nn import Parameter
+
+from pymllm.layers.base import MllmBaseLayer
+from pymllm.layers.utils import set_weight_attrs
+
+
+class RMSNorm(MllmBaseLayer):
+    """RMSNorm layer implemented with FlashInfer kernel."""
+
+    def __init__(self, hidden_size: int, eps: float = 1e-6):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.eps = eps
+
+        self.weight = Parameter(torch.empty(hidden_size))
+        set_weight_attrs(self.weight, {"weight_loader": self.weight_loader})
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if x.shape[-1] != self.hidden_size:
+            raise ValueError(
+                f"Expected last dim == hidden_size ({self.hidden_size}), "
+                f"but got input shape {tuple(x.shape)}"
+            )
+
+        # FlashInfer rmsnorm accepts 2D/3D input; flatten higher-rank tensors to 2D.
+        if x.dim() in (2, 3):
+            return flashinfer.norm.rmsnorm(x, self.weight, self.eps)
+
+        original_shape = x.shape
+        x_2d = x.reshape(-1, self.hidden_size)
+        out = flashinfer.norm.rmsnorm(x_2d, self.weight, self.eps)
+        return out.reshape(original_shape)
+
+
+class GemmaRMSNorm(MllmBaseLayer):
+    """Gemma-style RMSNorm layer implemented with FlashInfer kernel."""
+
+    def __init__(self, hidden_size: int, eps: float = 1e-6):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.eps = eps
+
+        self.weight = Parameter(torch.empty(hidden_size))
+        set_weight_attrs(self.weight, {"weight_loader": self.weight_loader})
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if x.shape[-1] != self.hidden_size:
+            raise ValueError(
+                f"Expected last dim == hidden_size ({self.hidden_size}), "
+                f"but got input shape {tuple(x.shape)}"
+            )
+
+        # gemma_rmsnorm is defined on 2D input; flatten other ranks to 2D.
+        if x.dim() == 2:
+            return flashinfer.norm.gemma_rmsnorm(x, self.weight, self.eps)
+
+        original_shape = x.shape
+        x_2d = x.reshape(-1, self.hidden_size)
+        out = flashinfer.norm.gemma_rmsnorm(x_2d, self.weight, self.eps)
+        return out.reshape(original_shape)
diff --git a/pymllm/layers/rope.py b/pymllm/layers/rope.py
new file mode 100644
index 00000000..045774e9
--- /dev/null
+++ b/pymllm/layers/rope.py
@@ -0,0 +1,276 @@
+from __future__ import annotations
+
+from typing import Optional, Tuple
+
+import torch
+import flashinfer
+
+
+def apply_rope(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    indptr: torch.Tensor,
+    offsets: torch.Tensor,
+    inplace: bool = False,
+    rotary_dim: Optional[int] = None,
+    interleave: bool = False,
+    rope_scale: float = 1.0,
+    rope_theta: float = 1e4,
+) -> Optional[Tuple[torch.Tensor, torch.Tensor]]:
+    """Apply rotary embedding to a batch of queries/keys (stored as RaggedTensor).
+
+    cos/sin values are computed on the fly inside the kernel. Position offsets
+    are provided per-segment via ``indptr`` and ``offsets``.
+
+    Args:
+        q: Query ragged tensor, shape ``(nnz, num_q_heads, head_dim)``.
+        k: Key ragged tensor, shape ``(nnz, num_k_heads, head_dim)``.
+        indptr: Indptr tensor, shape ``(batch_size + 1,)``. The i-th segment
+            spans ``q[indptr[i]:indptr[i+1]]``.
+        offsets: Relative position offsets per segment, shape ``(batch_size,)``.
+        inplace: If ``True``, apply RoPE in-place and return ``None``.
+            If ``False``, return new ``(q_rope, k_rope)`` tensors.
+        rotary_dim: Number of dimensions to apply RoPE to.  ``None`` means
+            the entire ``head_dim``.
+        interleave: If ``True``, rotate even/odd dims (``[..., ::2]`` /
+            ``[..., 1::2]``). If ``False``, rotate first/second half dims.
+        rope_scale: Scaling factor for position indices.
+        rope_theta: Base frequency theta.
+
+    Returns:
+        ``None`` when *inplace* is ``True``, otherwise a tuple
+        ``(q_rope, k_rope)`` of rotated tensors with the same shapes as
+        the inputs.
+    """
+    if inplace:
+        flashinfer.rope.apply_rope_inplace(
+            q, k, indptr, offsets,
+            rotary_dim=rotary_dim,
+            interleave=interleave,
+            rope_scale=rope_scale,
+            rope_theta=rope_theta,
+        )
+        return None
+
+    return flashinfer.rope.apply_rope(
+        q, k, indptr, offsets,
+        rotary_dim=rotary_dim,
+        interleave=interleave,
+        rope_scale=rope_scale,
+        rope_theta=rope_theta,
+    )
+
+
+def apply_llama31_rope(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    indptr: torch.Tensor,
+    offsets: torch.Tensor,
+    inplace: bool = False,
+    rotary_dim: Optional[int] = None,
+    interleave: bool = False,
+    rope_scale: float = 8.0,
+    rope_theta: float = 5e5,
+    low_freq_factor: float = 1.0,
+    high_freq_factor: float = 4.0,
+    old_context_len: int = 8192,
+) -> Optional[Tuple[torch.Tensor, torch.Tensor]]:
+    """Apply Llama 3.1 style rotary embedding to a batch of queries/keys.
+
+    This variant adjusts frequencies with ``low_freq_factor``,
+    ``high_freq_factor``, and ``old_context_len`` following the Llama 3.1
+    RoPE recipe. cos/sin values are computed on the fly.
+
+    Args:
+        q: Query ragged tensor, shape ``(nnz, num_q_heads, head_dim)``.
+        k: Key ragged tensor, shape ``(nnz, num_k_heads, head_dim)``.
+        indptr: Indptr tensor, shape ``(batch_size + 1,)``.
+        offsets: Relative position offsets per segment, shape ``(batch_size,)``.
+        inplace: If ``True``, apply in-place and return ``None``.
+        rotary_dim: Number of dimensions to apply RoPE to. ``None`` means
+            the entire ``head_dim``.
+        interleave: If ``True``, rotate even/odd dims; otherwise first/second
+            half dims.
+        rope_scale: Scaling factor for position indices (default ``8``).
+        rope_theta: Base frequency theta (default ``5e5``).
+        low_freq_factor: Low frequency factor for Llama 3.1 RoPE.
+        high_freq_factor: High frequency factor for Llama 3.1 RoPE.
+        old_context_len: Original context length for Llama 3.1 RoPE.
+
+    Returns:
+        ``None`` when *inplace* is ``True``, otherwise ``(q_rope, k_rope)``.
+    """
+    if inplace:
+        flashinfer.rope.apply_llama31_rope_inplace(
+            q, k, indptr, offsets,
+            rotary_dim=rotary_dim,
+            interleave=interleave,
+            rope_scale=rope_scale,
+            rope_theta=rope_theta,
+            low_freq_factor=low_freq_factor,
+            high_freq_factor=high_freq_factor,
+            old_context_len=old_context_len,
+        )
+        return None
+
+    return flashinfer.rope.apply_llama31_rope(
+        q, k, indptr, offsets,
+        rotary_dim=rotary_dim,
+        interleave=interleave,
+        rope_scale=rope_scale,
+        rope_theta=rope_theta,
+        low_freq_factor=low_freq_factor,
+        high_freq_factor=high_freq_factor,
+        old_context_len=old_context_len,
+    )
+
+
+def apply_rope_pos_ids(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    pos_ids: torch.Tensor,
+    inplace: bool = False,
+    rotary_dim: Optional[int] = None,
+    interleave: bool = False,
+    rope_scale: float = 1.0,
+    rope_theta: float = 1e4,
+) -> Optional[Tuple[torch.Tensor, torch.Tensor]]:
+    """Apply rotary embedding using explicit per-token position IDs.
+
+    Unlike :func:`apply_rope` which derives positions from ``indptr`` /
+    ``offsets``, this function takes a flat ``pos_ids`` tensor that supplies
+    an explicit position for every token.
+
+    Args:
+        q: Query tensor, shape ``(nnz, num_q_heads, head_dim)``.
+        k: Key tensor, shape ``(nnz, num_k_heads, head_dim)``.
+        pos_ids: Position indices, shape ``(nnz,)``.
+        inplace: If ``True``, apply in-place and return ``None``.
+        rotary_dim: Number of dimensions to apply RoPE to.
+        interleave: Interleaved layout flag.
+        rope_scale: Scaling factor for position indices.
+        rope_theta: Base frequency theta.
+
+    Returns:
+        ``None`` when *inplace* is ``True``, otherwise ``(q_rope, k_rope)``.
+    """
+    if inplace:
+        flashinfer.rope.apply_rope_pos_ids_inplace(
+            q, k, pos_ids,
+            rotary_dim=rotary_dim,
+            interleave=interleave,
+            rope_scale=rope_scale,
+            rope_theta=rope_theta,
+        )
+        return None
+
+    return flashinfer.rope.apply_rope_pos_ids(
+        q, k, pos_ids,
+        rotary_dim=rotary_dim,
+        interleave=interleave,
+        rope_scale=rope_scale,
+        rope_theta=rope_theta,
+    )
+
+
+def apply_llama31_rope_pos_ids(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    pos_ids: torch.Tensor,
+    inplace: bool = False,
+    rotary_dim: Optional[int] = None,
+    interleave: bool = False,
+    rope_scale: float = 8.0,
+    rope_theta: float = 5e5,
+    low_freq_factor: float = 1.0,
+    high_freq_factor: float = 4.0,
+    old_context_len: int = 8192,
+) -> Optional[Tuple[torch.Tensor, torch.Tensor]]:
+    """Apply Llama 3.1 style RoPE using explicit per-token position IDs.
+
+    Combines Llama 3.1 frequency adjustments with explicit ``pos_ids``.
+
+    Args:
+        q: Query tensor, shape ``(nnz, num_q_heads, head_dim)``.
+        k: Key tensor, shape ``(nnz, num_k_heads, head_dim)``.
+        pos_ids: Position indices, shape ``(nnz,)``.
+        inplace: If ``True``, apply in-place and return ``None``.
+        rotary_dim: Number of dimensions to apply RoPE to.
+        interleave: Interleaved layout flag.
+        rope_scale: Scaling factor (default ``8``).
+        rope_theta: Base frequency theta (default ``5e5``).
+        low_freq_factor: Low frequency factor for Llama 3.1 RoPE.
+        high_freq_factor: High frequency factor for Llama 3.1 RoPE.
+        old_context_len: Original context length for Llama 3.1 RoPE.
+
+    Returns:
+        ``None`` when *inplace* is ``True``, otherwise ``(q_rope, k_rope)``.
+    """
+    if inplace:
+        flashinfer.rope.apply_llama31_rope_pos_ids_inplace(
+            q, k, pos_ids,
+            rotary_dim=rotary_dim,
+            interleave=interleave,
+            rope_scale=rope_scale,
+            rope_theta=rope_theta,
+            low_freq_factor=low_freq_factor,
+            high_freq_factor=high_freq_factor,
+            old_context_len=old_context_len,
+        )
+        return None
+
+    return flashinfer.rope.apply_llama31_rope_pos_ids(
+        q, k, pos_ids,
+        rotary_dim=rotary_dim,
+        interleave=interleave,
+        rope_scale=rope_scale,
+        rope_theta=rope_theta,
+        low_freq_factor=low_freq_factor,
+        high_freq_factor=high_freq_factor,
+        old_context_len=old_context_len,
+    )
+
+
+def apply_rope_with_cos_sin_cache(
+    positions: torch.Tensor,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    head_size: int,
+    cos_sin_cache: torch.Tensor,
+    inplace: bool = False,
+    is_neox: bool = True,
+) -> Optional[Tuple[torch.Tensor, torch.Tensor]]:
+    """Apply rotary embedding with precomputed cos/sin cache.
+
+    Compatible with SGL/vLLM implementations. Note that ``query`` and ``key``
+    use a **flattened** head layout ``(nnz, num_heads * head_size)`` instead
+    of the 3-D layout used by the other ``apply_rope*`` functions.
+
+    Args:
+        positions: Position indices, shape ``(nnz,)``.
+        query: Query tensor, shape ``(nnz, num_q_heads * head_size)``.
+        key: Key tensor, shape ``(nnz, num_k_heads * head_size)``.
+        head_size: Size of each attention head.
+        cos_sin_cache: Precomputed cos/sin tensor, shape
+            ``(max_seq_len, rotary_dim)``. The first half of ``rotary_dim``
+            stores cosine values, the second half stores sine values.
+        inplace: If ``True``, apply in-place and return ``None``.
+        is_neox: If ``True`` (default), use GPT-NeoX style (rotate
+            first/second half dims). If ``False``, use interleaved style
+            (rotate even/odd dims).
+
+    Returns:
+        ``None`` when *inplace* is ``True``, otherwise
+        ``(query_out, key_out)`` with the same shapes as the inputs.
+    """
+    if inplace:
+        flashinfer.rope.apply_rope_with_cos_sin_cache_inplace(
+            positions, query, key, head_size, cos_sin_cache,
+            is_neox=is_neox,
+        )
+        return None
+
+    return flashinfer.rope.apply_rope_with_cos_sin_cache(
+        positions, query, key, head_size, cos_sin_cache,
+        is_neox=is_neox,
+    )
diff --git a/pymllm/mem_cache/param_disk_cache.py b/pymllm/mem_cache/param_disk_cache.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/mem_cache/radix_cache.py b/pymllm/mem_cache/radix_cache.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/orchestrator/group_coordinator.py b/pymllm/orchestrator/group_coordinator.py
index d0624473..2fec3078 100644
--- a/pymllm/orchestrator/group_coordinator.py
+++ b/pymllm/orchestrator/group_coordinator.py
@@ -1,6 +1,6 @@
 """GroupCoordinator for distributed communication."""
 
-from typing import List, Optional
+from typing import List
 import torch
 import torch.distributed as dist
 
@@ -63,9 +63,15 @@ def all_gather(self, tensor: torch.Tensor, dim: int = 0) -> torch.Tensor:
             return torch.cat(tensor_list, dim=dim)
     
     def broadcast(self, tensor: torch.Tensor, src: int = 0) -> torch.Tensor:
-        """Broadcast from source rank to all."""
+        """Broadcast from source rank to all.
+
+        Args:
+            tensor: Tensor to broadcast.
+            src: Source rank relative to this group (0 <= src < world_size).
+        """
         if self.device_group is not None:
-            dist.broadcast(tensor, src=src, group=self.device_group)
+            global_src = self.ranks[src]
+            dist.broadcast(tensor, src=global_src, group=self.device_group)
         return tensor
 
 
diff --git a/pymllm/prepare.py b/pymllm/prepare.py
new file mode 100644
index 00000000..e69de29b

From 02255d8cea835927f7b0307d7218a7312cd43dc7 Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Thu, 19 Feb 2026 09:28:47 +0000
Subject: [PATCH 06/13] feat: add initial files for pymllm architecture and
 launch functionality

- Introduced a new architecture diagram image `pymllm-arch.png` in the assets directory.
- Updated `README.md` to include the architecture diagram.
- Created initial `launch.py` files in both the engine and server directories for future functionality.
- Added an empty `scheduler.py` file in the orchestrator directory to support scheduling features.
---
 assets/pymllm-arch.png                         | Bin 0 -> 388499 bytes
 pymllm/README.md                               |   3 +++
 pymllm/engine/launch.py                        |   1 +
 .../scheduler.py}                              |   0
 pymllm/{prepare.py => server/launch.py}        |   0
 5 files changed, 4 insertions(+)
 create mode 100644 assets/pymllm-arch.png
 create mode 100644 pymllm/engine/launch.py
 rename pymllm/{launch_server.py => orchestrator/scheduler.py} (100%)
 rename pymllm/{prepare.py => server/launch.py} (100%)

diff --git a/assets/pymllm-arch.png b/assets/pymllm-arch.png
new file mode 100644
index 0000000000000000000000000000000000000000..37c48b2a087b35d0693646566dc9870c50786b6f
GIT binary patch
literal 388499
zcma%EcOaGT7mw&mv?#P_Tx5h2vTuv7-9WaocXqCM+e_-&WR$&SW^2lpy;b(k-u#~D
za=H9k|9rpK?Y{5(JZGQJInQ|?$;pUs+q!S-rcIl+p)Q`kylE48`KC=|Dr8&0H!rJS
z^=#U-a}(<PnX9*m23jbx9V~N17t7Q4^M`fR^;YLuNM>|g{y5c7vvrSnz;1Insz*QO
zE?f$|Lgjhnv8uqXdygL<@UCJ>pYGdz7Daou)8g`ri=laS(F>_li)AH&4feXjzC5x!
z`UR6~XJgA}vxAJ{l#FV#r}Tt}%odL#T{aOBlaTE^wEUru<B$l!pYZ$OMdBlf<$o2}
zeM@zOVdeLl-$+jRth<<OXJnoW89Dx5xSJx)ND<`|wdpyHo<}x5i}TRg-#iP0p?WUF
zC+9Dzg}>Xv(O|~4WiH=(f%N=Eg5_fbC@u!ZjMeT@VJ0r3TG<eOXA`tL4-;+_d@tur
zv<v(!*o3Ii(&P%u+O2}yNKzjBd1pPIE*%-_h0)%dFlD6dX9!jSA5yJ>y!)%gY9dJx
z<=ej8uaqL#_v{s941Sd>7hz^89zMq}AUKd1+ufI;cHx4{CQRYhYtQiiu=3FVr}-v$
zed0E_twStl*R5@VJ=o+|D_i+dPcGQjkbm?G+%u%4^DF=GV?2dR-XhE0yLE-mZo_|r
zmjE<}Ch2c)(8qMhF5Xn-s?hQ;kbET7f2AW=2cX6M8~^P@{F;~R_KIh@)fMEZDj!b`
zpx^RU6~aV3yEd#9|6}~pb>)zw&6Gb>CT@~H^!hi7KL>Ot0}@!Cwwru)c^FfQNVh*;
zPZ<Yj{;Fl1n3?M-@eQVbRFaB=fW0l|S1DGX1U{gT1}hLq7L+Oi?IqjkvkP3r{oXVT
zkep7P=mqs(h=5ZPjKKM0cJbR)E>W&yWrZY>_3y9ex(gX~Glaqy?)|<Ra(>;#NEhS=
z<%QdjLU2tNT2f!o7a`mfSOnKJ@sSNnCF#Rk53W-1sPMhbeRw6>#f4`g{+o@=B^yr%
zG+%5Lxl<`HxMJJzzg@fW8YfnWm@h|uF>rr$>G5lLB4Y|jvj5M7;up4Z{VCe#LUqlb
z4-nsB3{L$ADl8ihDgXQw=JL`Y(LCeiQeK^?Ft3wbi97258`wNaH0R=&FQUm1If*&y
zCS=jb;Sv{_N0M^?k5V=slb^y=-YNbSQO1ag*Eo~%1pNHU&ctIl!e9RO@aVJ1#rzxC
zCn`@@?~`w}^&0I4RzRb<{SmyEn)#<a=ialfnlg+G>F9qndj-^F?1)LiSvA3bS7{1A
ztdm521Xo<4r4yw|3b_;{ASQlm<l2LmSMY1YCUx#^m7fsbwvlV3>z77%nrPeawI_H9
znBIvt-mgb+%i`)GZj{B_gB3>oztz_KMml@MZpcf;lx0Cs-`_-wfq(_UO>_AJ4axs4
zo>kricPht6H1b=)qhqU}Ewt3~*IwPEW)U&#Z)F-UdOlbnrz|5%w=zj$|KgG0z3_fx
z0Rc>FP}=>p)%8@PknHQ(u~7r_Hv#Xt|C?(>FG;v!e&agUt=*aM9?Sfp+XX!Jk?)?X
zQUg{&{_x@2pExe9=cUwTU|bR3*~P$(eRn3x{7F()Xav;#{o+obNeoSA*a#OVr$}V}
z2V<^z2*e-TI&`b_;X{1*iHv?PBdhWNL^TW$<9-~w*t(_BnG${dz0Iq51DN%!kbQ-D
zB`GcwzyC3w!#%0sB6SK`)jTRf+JasG%0l8rluHxXye-Ce3^-PFAXt<n<@O(G4g}N;
zSpSLE+}B2mNd9&E+Mt|kB!}^t)gd5Eo7pemBQX-Pad($B$yyPy4JI~+8n8-Gg@^i3
zg|ExM1QEEMZ4m=<k#EepjS){Cu(BnRls`&SA1hd2yo!P<<nL1#9YWzC&<R57LTPLQ
z5oR5Y`&q*84^<=(H?D>WD{H{h9NtSE`1k&M(E>PH<ady=qgG{R`2)d^@zx2+KsJ8b
zE`G<6YLbmaFCdO)UfU_|4y$fWP84#hDg4Mv*!(w(iqrpBz}N7RWV!5IV-}7-c*8)r
zS-eUHX8~Et?2qEbuMRUr7J2V4vcwevF4FaAD|#S~3?;nl9|ad!p;6U<wD4<dJOaU)
z+8?jid<YeI<TI@R<~sFGPCQjt*0&PFuj2v~MT{wM)%{+f=(Q*9JXGOG#Pg@TAfFOk
z<bU{93=qHkn_D0H{e=k2(p7!*EMQdqv)3zXv;0JiI(ek`Z?DI^+6@@x;jdN!_s&&9
zeB-xruqGdxH!x>p1N~2*WhC4bT*MjL3MV}G@CIQuYpdcoWM|q~7`K{GtRoh9K3))3
zzYlJMIolk~zPpnKNB@!F;Ai)xfc8E&x%qvS#`9|Vd4FUsSi1rcB;{Toam||~DIUKi
z*y<{lB^hG?>E~LnK%lJo`b5Kj6no^-Rv;5?CLuZ`WR!j`ldH@8wF!d@$3;7?z(T)j
zs3s;bN-p{DiNjZ&>o2ZHY60zaRg;3(VBYPnF#Iih*67p6$u(Osd<~BU9N#4o=ue}|
zc6dz!Acsmxq<$|e!3>ad?FwI+aL?oVLfj~`%GTu%|8Jp!w*VF})j!nzySKvU%wSU^
z;G)k@=<sWy%yl8#D3*j-Ebigq$ABn4cA*K{PSn3nTL61k4d2>!S0YBFGt}jT!B;%1
zw6fl{^S@P*SpKlV5^z<3^=(0q83<u|sS}sU@qc$-CvM9Npa+g?%IBUf&#t#ii*v;X
zZJRHd<}FNG4ChRFJz#Kp%k+X=tDZE8b7?X??|9x!sacWZREy#w_-mUWs=VFJF57u3
ztIboK-Yzt5LU{Vyoz9`GVGi3?JH6C6r-gnE#e^BvqVfOeZw1GpcSF;Ti6@J^yKW&H
zHBw3$=v8Phd;JdLsb))A{<(qVVxs>J$niGnha^>Bmh{}CsQM;0XR=Xea-c!SQ(l3k
z_;|W1k8pd|P^QsLb%cmK$J=kWS$)G8Zd7a`b|t~4Sa*6VdhKwY{-6VX$<FE6D_Lw&
zd|a{inwINHy|R}{qeX1qVczPQez`MU{@!<Xh`gPja$d5XSr~S1ixjA8-2Has^M1Cm
z0Nd7#*u2Hrnr`uIjZv((hmMz0_^hg3OTW5eS6P5c8GrXn)<@={eOF|KQ)5#qB|7hM
zI$Tdxt5r6NGgOI7E)%FR67fztsCe!AJ;TK>7V7KOH~$)u)+)!BKfsu%o)d6(R!34_
zT%6dp#aeNuM_g1{LCv~mG~PBR24T<{?r&<SB#wS9S=r1iI#Y;6WFkHydV6A%%PIxc
zB>A}`t{ftN#W3?lJlfH|KQ5hv`Q^>CGv9YOUz^abjXc;}I569vz{wnV>xNxZQ>%i4
zhT}w1m5iXLQ9@4Ibb=%1G`&^D8J#L=zPOUSi8wP)6|)@cS6fH<EhcIdy%QV<jXDE{
z9rQdu`jorRjCFfEY+2})byn8*6{Akd6|0h7P8U;m@7(<d0p4;Iu-wJ;?R8Tds&Bl_
zH$PaEoG6g;J&Y0`>ndfN8%%HW=)X*ai6RS)$f%MP&FOnJ@V5OK+aS|*9_8-zyoJHh
zgr(Vp&d88whNqfR%!yGBlAM;zK@OvD`CY{eWJ~ob?c3&BN8DXEs{Cg5;NJhIIn}F<
zjAL|L#17?@hIvOsA!vBEPR3{~7|s-HELQ4!8IYhkp0{?Twb(SVK1%kVn`9Old4yH3
zMAPSQO8E9naOjwsW4}s-N373JhFjk-*>reAvA`Pq2j?$!77zhZ!G4mPIB2#yBD1nd
zHMv%Fam3Axx7dnUnELHlnP9Pkiiv#gRL<~(_Ce9<cRSFR&D2;++fB1xBu>Ak&57hd
zY^Hjk`*%y<lLL%=>;820I~gYAFM+ytx(Cf`tLQLa(QnZ4J`U{TQ9a)%vSynmlg`ux
zN0XU`awlF-LuHPv=^szhUy7D+GdTQwXYrn#+3KEVyDQ(2gfQ!>;f{RcQi0#{3UOBh
zvMn-Tlldk|-;I7kL$dw!H<!RIxj!DWO{O?`Qqyt%tT?yBQ+TpIq}ge1uwBpE+F_>C
zd)Uy?I(i_pKfYi2bb`}FOqPQ8V=~IeE;fJLD1dkE;)hh;ZW#%17%haRX-Eg5<1WqH
zYp}q|%%y%vm}T+j1JMJb8U5{?4&N!V)b-R@vuFEavZ#tc7Ob>56SS1UW*tRfurPsJ
zBVYd7epS4H3DMi#fu<NBl4hObRkFh~h8iP}6~`S8Dj+q_e;Y04<SaN~S@K1{Ts^(b
zX>L?@X<Sw~#<_wmxBZ!*z}CZVw(6FpJhUfU0C5DP#jgXmW+jW1{(liAM1ql=&<kEx
zY`H=+4fv#Bnln+$QdbpyyOggjn>}~>N8Do{zJtW)Ql6dL&abWfT6(lOB~~A)Scw{K
ze#9JjEq5$n*n_)VC^@tsA0RAAv1yHloi3ZVA7hJudxP<3te&vZtB;w6Vt1U?fz><Z
zV@O4)%J-4Thre2%rV9jrD?ac09Ny|MRw}&1J|e)TZ$=>2CI4am6p&$yg^4=1r;`KL
zz_zG8udYe<q`dXXdA^;Y>G7eQshm;I1Xa1oN3sq#X40MK(k&(%^t{!80em+O6~41T
zbkTgKlhauVJ3R+vH*{ZlO7;@8Jfprgz{-0hI4|^5c#YvuqAe=O{`IzY6*|DJJUADa
z*uE?iqg_f^IVS2afK3|Pg~uyKOU;~;Us~5H;k2W{B)z2wE6Kqho;iMCBFQ_)xFn_U
z8Dban;KEF~v-(ip(qf@PvSvoxd>5ZQRoRCfqS=~_=gk&p>+%$YC&IP?|HB|bZ!h@u
z<buEw=0Eyld9yVMMXr3U*PNE-+GI0)7k>ECmxLdX+P}x)=N$&)km|D+K1<!|cp+ZI
zK;Pnq{ptBJ0l2srq4A1yT18Seh=L-pqS{N2M+Xa&P%?8>L5rsvY7~pZa{Ee2mW)%r
zis{tGDp7l&jVEK3l8f8}$3sUum_;-C9&r@f+U0#Y^2DP+c*xvs&%paE2a#qVag1|Q
z!1>kR+oz)VqTHv+EH4MK_a|^Kta*zQ3B}+Cb?C#E>j?R~(m~Vio}q+kllZGAhzGYK
zFwreZA;mevj>BwCCh2NQz-brlGm5<g<Sb^ibt*SKv@U^K=Jc!V`A*J+$Ijn2GeoL0
zkI7=?bAR6B^Z=VoV^6KV-1&*q$sCoFaO)FWG<}}>Y^ArtSnJZ_l()N#K$OH;Lp@;w
zEw2D>JsJ5n=-t&Pq7^+Y%Z1Z>e~fjPbK0HnPAcY5snu{^!15wa+yo&3N6<`(o&?sY
zbt2x@vo2@Ino(FktOQ_OlC|!2l)hewSI5(vlR`^N6(+OV7UB=s)L)l(Q+XuZ$=BvE
zhUL}$GACu<zQ6674KSG&)88NP#Tb^1BN)P&6QBWu0f5fEJqpMA<FiarI}BqIa?w1;
z2it7h9E&4F7kaTI?@sJFwYT@4UKUER-QVMU2nEMm&H0YBf}n)~J#ROl37vV1{dp4D
z25hz!c!a0cGyZVnx8Ltm^x>kMz_FLz>~v;R(%9p9OAEt_P3%n;zB*n}^wNHrd=1*}
z*z;Ai+22TOoW=sDvLm38533?cOK|99s}b?j(g8+-EsAqa605qO(V6j5KXj;qLvOYA
z8pwK;I=x)bo(!2A@dv^@3Sxz~z$b4zeok5?dDyAv90TiKj-2sIuTF8@F7r4RQS@|u
zfk>L%Km*O(aGtZZacB;|NLqugKUV8-!rnH6hyaPnkrb`aMH`KFOJL3wo0xSUi3aW9
ze>P?}@}4#~v5|Ks+h5SuRDLL>N+yP?xlyfP+G2*XtXmab|HMv2>qaiPnWrmnV<ZS_
z1Q>(3ebaz1RO~M?e@Rj>V!d}cD9@pm<0dprEBOrGN1%@aU%(e{IEc-^5;*FCs_{U$
zi&2PtCzp-HzQIyr=}WQK>LOKR3}Ddd8R@R=iST9g4ZAdhh}BWA^+e<#d>uxfXt4LP
zL|>Yb%9^NEi8=4j?mzP4FzQU!aIQTHD~DAQ3y&y>K*S<WL|i%|hrNgGu>8DN@2Z;*
z^eHg~C|zNcL=k$_?{F3N>BI+y#Y7-n=yJ)|t#1nGRj?#p)0aiu4omOn?r;oeG#GqO
z!D;L}?tS`)d0$k7S8APl7UCUe4rzFcFCY3#N{VQqBI=OjZB451Bw;b{{R<)|7|mJC
zp9Kcm$b8}+JgBCTHLO)>+Az|IJ%4L)rrSF%y){!pL4s2kidHcc3hdoa?y@2o+KaY%
zb1hiJhmm60Q;T;P=7Z6@4X6^HiOlwRJ<=a35lL+_&5|d#)&MG}JXCkIKex-zNCL}{
ztR@!T$un1+Z8s)CH7SNYG7Mx0dp@Nh8Hle#$I;FoPdH`1-n&1LMXP3Y33b`nt321&
z(eTx6H<^nWB}>iSB8n%41ft@Wl83|Qdk&+b+NqVH<Mc+vbk<+Aw{+;ZRcKRg^!W<Y
z+^xp1R-Ns-^T5!?KPz<TLJ)BRAu_0Y`&$0BG1Z~G$<zc-k0;kf>?fNo%dtA2z0G(7
z+m}R_#zbRCY8}cL!^bD<Vja>xq~}h=83yn_bN}X}HQZ>FV3PK5L;R=tmh}8aq<s7y
zQr5kD_Q+XRI(<yH$W(^Ap)(dOBhVX)6{qBl(foRW)`T~vcbqLDI|+edd?X-E!Q=bl
zy!ZG+Z<3(us>u2}U_iy(X{EAyUO$nb8aa8`t-|5M{#H3`B6dXKgPYTMNHkxzg3jl1
z$=eOn278x)rF-F>_5hK=!}gLS>UdiH>jO9Qi6|z1)EN7Itx7(aB#iyysa_>kUOcC6
z$J^wjq!)7-B^}qr=kJXOZFdvR6#_xHnL?-^jI6w5+<Xrl?Y%TOk>KozvbZ6C>AxlI
zJeh=NFnE`oEL3>HWFB8Lk1z`tPenMIWRHB7KcXBn>e>}1o{n&5dS^C2UoGm?c=fZp
zi=by`lM*k=73CyJtP<kW7<E)9cf{4qqh42IHfU)ksIuo4DudEoBEf}W{@Z^s+SyY6
zCC@|O@#XYNGrBDXl#F_<1c_TDkw)triOZ%VXJ{SqSiowyNLGCbKxhZk4P9^em4q;h
zzn8@-(R2HdMC%G)a`iCj=E;c+t4lyoq+ZEY@*QvWKx4`Ha;4}C_?;T3M-Yz}hL;x2
zU|Lg~WSjW4xbsD%-FR1z0d-E^P-g!=8r0nWGX0Zp8qn^+W0`e1@q=`JS2Y-0I(NNx
zrQgvcS*|;1z#}Z|mg?7L+$u6Ra7DM&uf}*XG$KMgd7R3NzJ^JyP?Sc>?GCD}$Y!A+
z^aLj=*if&naKzKZx9-FqD=sYJ)40TGrx#Jx{JHT@+w6yP?0OrXs}EqSPkV-DjJQgk
zZBl8Qt;Gg*el9P*MW5~$+H4xf-8FdNfs$>|HotR!hcZC<bwP%!o=Aq#j^hRM3%!J#
zN!H#Ay&&@BbeDdeHIP(X#E~_CWzFKv2Dv*&yZa*;9H#ZjT1YaxX*zUq2PswB%d2=U
zg;dBZs!h1rYH2uu2+S>j>s`H{QGABnBf;Tp5Eo_$^_jo&b$|I2L|B<HBGU323hVaS
zlX_xwu5Bq-+u1mz$L9zN>oA!dB&@5PF1B5uC)i>rquVAX6FZmzG9^!h(X)hcn4nIh
zgb$YIKEIp$l?^6TC(%|#!!Vm*srwpx`-%+c0(lWr+BIc3OK(4LBCNG*UuwTnIa(p4
zFnqP{VWoz=_p6u_60C&=0No~+CamQ(3mLr!BpgwF4Q6=@67;@k@VC`fJ+x*c2nz(2
z!&~0lvH!T(fS3&R4u3IXnhT>1cB9xgV&d}Q528mX5HmL&-nUJk3@PAiv8p<`IP%s`
zN*{f>|3WOlGc+^01MQ=k#v7BBQU5K0kT0({cwFm(ZKFX%fmr4=2g<VdQ{oeO1^Lcv
zl9$zaPWA-}3EZmgNy!!lct$~Nx4B#Y5R2K13p9JLIH!wMTB=WWn%2~r3fr9@n7;eL
z#@j@Gh>_C{71PkQOP@n?`yU(|Otm!clV<v(xIVnQ=D0ut#8e){Di7jGy*?&XPYv1T
zWp%vp9K8CnH3*BWlWEY^@srLQuaqoaS{z<dNZvnDD_`34UNj~?({Lujc{bvt{p3KJ
zkt9{Mne$xJiB7Yu!L$?Y)UL+BGEnT{pXqRy{GqNdM$3da%qeUW7OiftdoX}<s@2|%
z`4g>2fZ^~H3f6rSOfvji#H9QBn#O0nTK9=73N&=Q`)X}9>*O%n4C3_;nHYT~r;$&z
zX8g`4xragQoCM+|8QrfeoR)xu(izW2_JnqS)Dvpo+s?cnkGcVbQiTY!DhBI-$O%y3
zP)rz9D%Q7Ywd;;Am;lK^hH+|*g3jx^f#Y7*-|x}dI4*>+Wq-R(5&acLU^PaZi%KS`
zHPMEA^_qA3Qd^uqPo1?ME9Fbi(wFCOD~@ZOZx2GS(>qOnU!v<TE#?FGSM-nGpRo$n
zU7|;Yf21m9v!($R^D>?C@_Ovv>_w89JzR4WB*jmo1PcANn4~^jq(X{kEfwd@ODfjQ
z-lokCj+qEc5>n4pEX<6`s{t_;Uy5JLNVf5quLQ}!+cz|z{^RSi!6R*`jA=%~0>MNV
z!_#lxO1i#D0J5^pd4r;~?_|?ZGR^D_54R4w_2ZFrx3$sdi#ybC&K>|zHF1Wt)ya#q
z{obEwRATpO=ml6u*V!y(Og0)7>|Ov4=Y(!{%|)s_5EgX1vS0)A(E%x*B*6%&;{*KR
zEbjDs2OCg~?!9(Hzy&)o$LPllXe`avS<H_Vq|4i|-s8yZd3_S?Txc%jIm?ta(9>>T
z_L5(I$5Zz>qj3vKjtuPvt14-6S-;UagtyadRZyUXuzOr^RY$!;-~q?+ubu4-OSc*T
z<V4fgFuIvx8fjOX9LUqqwZ8kueRm?=uN0vt5G3A1O3_gdh+!lqYbbl9t~tZ97^Yxy
z3n&|j9cmP%`X>&PYXu1H<B@Mc_s7{q-S^HN534caTe2H8(#Q(VlvhyFc{gX%s_HmV
zqxE=UJVNv!cdMZ`=tfY{pNxtK%&bUJpsuv6xxe4E-u!*l(n8yke9a_OfBz-KtAx_m
z6pw|+e9KjT=Bk)E0W;|Nbxt+C&*W-3Ojk+WVMI<k95Gf3a@bT&Ykm=!>3XY}W`ym-
zl4WRik(g~4`k8M5uoMou(YKw53hBga;>nzO^P?xP<dlWInh`27%K9L|7I!tR*`hcJ
zgrD9yQ>|Gq%|GvDY??nfd`r9mWEL^KgSBA?AdOOXa>14dLMyqh@SzK<7f-*cm@^Y|
zI*mLjHZM3>S9rw4djTSYB6AJp99I{C2T;;ijv<d~7i^o6@kCmHFfyH0Kv$hg0V2Ti
zRjuHEvQbkRo!%O`u^C-{@@g~8g~BSvZ!WYde4q~edAIUdRGeSKw;&i9Jxw{D>@MU{
zXLElWU-M0=H*-T-2_MlVA8C#PKP8DOw*`QmMMhtAjKmFtOAaNmFV3f`+doq>*dc~~
zaj~Y5myW#G)yzT54mb^UfkARka^0iWLY*wKEWPq^RRUsO08be#hJiHH{}(8eoL1+h
z1rtyI$ciIR*e}zlS$!ovhvt2$?<><?4*E>0E=0EuZx@;VhNb@|pnj{FcxIlCFRD`?
z-8KJ_S>zH{U+D79j7~2$n9|tQWwpu!v&wU_fo&a4DScgTZir(*>{87LN@Ugz*mFAz
zVn7-i$XL<&<__5N2f9yRABk$|o}yC%%_PMDQtTtOJejMheVY%p)t{$-7{ye6>oa$e
zV(zGCv|_A``}I+>Pl&+)yDneG9HWE44t3t8mN_QtH0oZEqIqW<FCCCj#3RSvFg6`}
zyN(ynux+2GW^GD&>O%d}9>63N(ZET8FfagCu(&DhzEAi!t6RFlgOsxNja7{-yx}FB
z&NIa)+FR&)we!kmWdsLPu#UYH8q;TIfIm|bXxQ$pX4Bx)QS7`>=X~5-9c_^6Y>Peb
z9a;^5q|odxo&=Wv4qC~HlZQ7vpe{H>>8)yk5-*jyZ9xAn3*cD_!uhTPQwQZwM*q~N
zkF&47Brfk}Bs}`%Xz{S~(%hv~J4sxCW5{@jZk?R2E6?AD(C~B9=`-5|$*oWv{fLq9
zmTTnzX+Lp6u=kabgFQWfSWj4|?n5P@<GRuXhmV`t72noQ78*L0b1&}A=Y15%MF7Ts
zFFe)nW{Y*5d@@W@`cea&u@|R9nS~9czV;{NIh9PGKPt)o=A+LM=f$z0f~V>G?1n6Z
znhb4J#u_J#W0J%vjsqOcZLz6g3B&?dP?b0IBvUyYp|9ZJGBiaWm%7~bB^0r#%lzfh
zcIq4^a}%osZE8<5-X;EWyD}f8fPoJ!ll!Zlb3E(#`n8?<mWq6kmP3%^q_6g{sjHFt
z^?Q5vmHTKKrj6|qKX3^`@D{*-yY@aZy?vo{V*>}8Dfq@ldL6{PlL<6pKC5HMoCwMd
zo)nzfiJ^62rdy^$7#8%UtO0IT=QE;klvefTnTh(3<22sl$KnB)6!+N8cM7E7H%q3q
z#XmFu<cZOP_;0|t91r%q&-r*=>t{i^p!neybkzyed5)iotGRnk_lwqH_fvKtlTT;O
z)l^3>?C*VJ`tBnt+n_GFC7Rh?$U9RIr7Sh`=)O$ahc17!Tsdo0xa5!8v)q1C)4<Tz
zYnOE}a{>UtUE{5+_ds>;HnUqF&gMwS@T8ak*dSW|s#|Rf@QsoYIX%*LpYUx%zpt19
zJI4*WHcYov#98za+wJZJ=-M?#ePa-~TuJ!(b)Ojen*F%RgfnxYrP3j{w33@Y9x;0X
zPa){uxG&RFXbRMfQsX9oddFk~)X_M`c&i$40(L9`i1IjC?>QN=)I0v-*_)E*1!rw4
zk1Arj#S(0jhMOjg(-#XtmzjCgV0=h`1+01reu_TXZtqb15y51Gy;g5<`C9FXryqo;
znyW6=bKZJKA;8)u^L@PDCp6opD2>&<Jnx`Q{i}{k@o`1l=7dHzmM1oJnSfT5JmBdT
z+r$=H-y0aTK_BD^u{nx3BhWDyO;3I6(;h_+P%d$LH4`9p@xA!{7dngc-J)*UgAGRL
zpJ-q@TRms>KpJBs3`#-jO*#-c@_s;%j+_TSGc)o<BnSiBuO2x73`H1B5XVk-UayUv
z6mC*dqH=F!;ma+7CN{HSPL&y@2<^yczKDGqHy$y9XksyAICl=%m2sUR;G3RNF19Vr
zIUn>iOaGZWH`H{g+#{~9G(;69>#3|a=<$X$Uz1$_IR1|nm$g#e)a-#6atjkfwo5rk
zM3MmR;00lf`BCpYV>%@Ii$KTob44@VK^g+RdyE`(Afiem<tHIb!Rq_1^wk4;<-mzp
zJ!!e<x#T)^`@v_dIY5>r@7vHcYP(DCqcQT3t4sX0o5?i2#fH)C>C3ZwgjL2thJ!9V
zYRQ~@+jR09&nR81O{4kJ!W19-B#0TKzK@Gh2Tgxu=JhjfJdG(3j!&*Kr&2DE|LXbm
zKm@Soijq|JS(m=U=!2(DwaPWR`b_YD#L<6ix=f+z8K|4Bwf2<^?kC<M^b^^j7vva?
zuG*}OHjq4WDO{1dV8LCzTm+PSp7}Q}_5hET6nrzn%|tCx)88zu{>GEaIQ2>i|Na9k
zUUzeVJ9g5xdS3@3>UeFR#BU*_Nv`ycN<B+$04}AF?(e#bSU3R8W=m{Rzcs8&x<`)z
z<c-aKC_~RTk~SSyb5$)08N=jntG;YB`PdUkC(g+&P-rziE;dZ7;V>fRF1?@mr$>a4
zPP0dC2$Vn-TA%mL2)jTCmV;n&WVfN6@mGLEEWX(_Oi&B*@pB3Le}4&@zcB`TK!-zI
zvqEiDzOUXbnlAx(EFe(n^OS|BI1w~got8m!sxa}Qi3ZE_R@GO|q7GafkKjDkh{Aro
zeXr4B?3rzL2)fzW|C9aYSa<ry%8<6{!HURBy>W)1UTJnJ1|$^a`loaDQ$rBfVzxKJ
zHZDEZG$rcNL0A{wFKuen$OkZu(LTkzOx0W8?{!W#nQ0_SQFRSvkCj<~5-$#q9!UJK
z9al_-4)XTNkTxp|2UUlWPa(8%txEu8XDP+@MH9C#z7XQu9$Kg$QD~XoiZ*ih%jC9$
zkZ?n{$WP1;sf_oaVk(=leGkZMG>BgOI<HxY;4n@wuPs**Zz}T6oA12871cUZ@j&D%
zt`ZDWyqB=T^_08pSOLAgG0b@Paa5Ik{Q5{_X?STN&xWQqvIK|b0vJAF0OVVpQL>JP
z;>9*uKS4NLXX6PmIu#vQt@2<N`cA&Bc=CuH2e}I%wkdy?k5*BoBxlRboT}62@8wH<
zx%J+kjN6L1ZV2|zlB`$G!4{WtS;=`}qWEMn`xRL59xb!YO@1nKSwhwtriOc$+8`R|
z7F2B|B;3k)!Ynku4Fr#@2G-iy0d*0=g9d}U_s^aTU@F&Xf>lQ~YkPpG7sFEjAr}6E
zQ5;U9E;<D;J5&N&P#tVVV9LoRB^0I#J>p+tCg(tVsOdxA$>4i_1rBNG`HqW1s6@e@
z;5$Y3tz&`E(|Ya)wP$Sw@q}Ij(>EYLN-U8Cr7qIpCc9)1$9R4H0QDJEoJK~&I31%9
zGwi}PjNdukq)Z}m)xDKK#V#xNF(~A`H3*1_O_HdxbJflMzRirT-M4jCWd8dO?{W}T
zbl#@mh@?O2A6jTCtekXdf2Qythz(>=oOUgde8NObzVS+GsA~d!ks&mGEvR8zCGoU#
z=+t77ms;!vO=mrMeV=Rfz-pL#29q1jHLI@|B<Z~c(2eJZr&?^=DoaCC97F)jX2yyd
z6`Jh{DP~xjE{G;C2M|wjtNpMV<01rpYEopDGJ3=P1=P;t+Le8HiP-*hOa-G03S)kS
zJ|gYJilE~lCGBv|Em!sLfC<9rl+HP!cq?ETIsXIb&F$;0*>;SNnCw0dKnVz(?_l;u
zJU;+}QpQ53<7_YQ`}X!mH<4RCN>!?IzgT0e9@2~Yv3GE9n-KOCIhrjt@hplH15oJU
zB9@4kmu=b{r?A?^ViV&{DdQbHY9<2T?@|{rfLiT|n0_lk#2L<<#<04NGF073mE+I7
z>kJNVxup|;;AJWqKjm{i9Yhh~q2limQo6I+_YqS}dwGhP3`T!`*ugnNDV6XHc2O{S
zBNmws-#XdEQ;3mgTwJYPFSqHm?3DZeejALzW4Y1hQ)g79yNN>>Xd7lK12|8J9M*<*
zB@2u2?>OO`4pOt*I-?JZ5w>B>*|1X~d&oM0ZNEv%CH@}|k4j*#a%6O1Np<x4N?{tA
zDj)IRTLm`~>CtZj{jpV(vz1jfJDh&rIng-`qLb0Fpr!0SJLsI)P$Yss^EVd*)7Lc8
ze!k$Ukn2&@wWl>!aieS+Geth~aiEl@_1T^uTTnQew3t~V++gC4$OKW~O^E}{0ale~
zi@PlcVJK+VP^_*X%zB#q9jM0H$EY*VvI@Psi^!Id`)G^{D!HnOZ?C~E%PmA}kyo%N
zEH!uAe2o?-b((8x6PX<zpAQOb=WQ}hKEWoLq=>fM$NlEh7l^Ybz1y<iW1QYoeZWf{
z-OHNMFQ0&xhlZ)BMo~jpw-<N^5O+&GwZfM0UYg?t5Zj~29x`Y(NjE)Mye)>g?hoHO
zPet{N>>qm)j=}aG{^9Jg&#}YMlt^M7KT8c%y%2J;$@H6q*mg;DAn2R$8cb_4ElE*-
z+Hv9wKRx=Aty;gsM2v>;(O00Nd#-aJx!g2k2-Y?sj9H@<*>t+>b9E}j$;Ykubzkq=
z5L#<ikfsE!Po%3sqlRj6seV@?8Eiw@<}TPenU-!X^<12pX~gI?DGY5#pJ>eAczr`S
zAP*$8HgStKRAJcZwE59u+mtdy-$(9*IGU1q%QF55`4_pq-t=RWZO%&u2IUiV8XBLc
ze)ufp)UlPn8N16Y^r|i<N`3*S$5Mz20Hm4-OAFO(z8+_D<sA<H!tA3Ub6vl{wH8Ov
zw}<Vf;XM&1OOokZU&9(YE|3mNXMF6VoJ*seIz!DF+q$i_et_CildB<<S^<o0DrMZv
zspAga2d1p({HLOJ`l+|03`-*n)L#C^7zh$3J&~ES=oL=z7hX!;`VG{F_i+mfW5N{k
zPEco^#;SL>j)(JEzxn(<%Fs=C(q~e07<6s~##_QR5zuyFc0}{!ucN&+xI5nih?%sn
z9BB^K#bMQ`thx^oA`?+q`XIkJy>j6xc4)4X8?N|?h(sAkcuHG*2Xu4BN{Xb_>-w80
zG!E(*!|EpN8`7e%9chtuo~`1jt!;{?2S(Am9H28g0_aZk&v*aPZtVi~E=?4+=`439
z*Y|oYH<NmIE!yaK$33XCj@^Q7{Bh>s$+CkDv#_8U=;$;V`;~PyBl9^y#J2j!N{8Qy
zvW)wKNZqOFNMkbcn}MvoGCwfNv`?pgNMo<^0w~B!p1H6W^eeIU#>MEoLzTebf%(l<
zTUuXX`$-SBX8jOXf#`M|tOMLL0ILJugZ%Wo;HTMesmT$Dl6SlH8CO5in#=(nDn7I>
z-}JTvtjtz<PB)g?OkGjSk*YV@-My79wqj!8P`@2WYc*id(GiEqPu%*@|929z{5{=G
z(9gL!m#DvW9@fCKLC4V`$_J+H#h?^El-4JMty0npkmm(KR9aEGihd@2Q$w_HF=w8G
z5{S%H_OOJd1m=JuOBx54=@4u`C=i)^lLT4}-Tb*zzsk5zho_|tvFCt#m>1*@l*E>(
zo2thd7J9^sxdTU@nqOHG$prlYo-hfy(z@uJ5a)69+3DA?jzPb7LI8!iK%*4;5}>U*
zPmZVgXm1NR?nUw)I@*Bar_S^ubm%QhP6(rl{55L`8$G!gi70-ZWrYBdbFKZE%Sj4!
zEFIVSkv;v1b4X$i%Vh<sxE|>_gC+-S(m|{(kCK7DDB|T#Y(akbHUMr}h@13W_^y<!
z>j}buTb1$=-fs#KZ7`z@{Vx*+mW<>M?xy)66yU50L}Yae4%#|c8UI`?KWf%p>aPfj
zG)eJ{QmzBtX6Pz^<=fdk7dTnF18mjpK$);2Nj1iopSN6StYj24ljO>u7!%letI|F2
zst5oaucEAa-^Jazz$|Yym|DjH@&+3ikuayu_uLyj_5-$}F}ejG6mGExmaXKWizmJy
zjt|S%C7Bo&(DLt39C#r@_7IA$ZTd4`5pCY|C)P8pIs2@WCOwvLy*My`uI6qK_Y?vB
zBW!{LiRWfuG8$c%f-p&KdZs5j*U%|EY$uxm%xX~~bC4>N;W_jj!}OsjFZ*JZ*2|0)
zpQ79|sX&|IV0GaU3Qy?-YaBB1uvG^ALsF$`2=;BfbY7gH7TIDWAWXY8sg%4N1Z&ZE
zqd=eE5-Tcvf6&#_mv~S<O#m~NpyC#6C=UXeo~NwC4{FAKX(}~iW@W{$1em<r2zI};
z6p+a*LSwZ(KPD>ltAKtDNi0Qj?u+jk<&LH*Ih~AAmc3!#^h(CB>c$bi+HX8m^t(c+
zE8|l*3&T)Uj#ih8m;7t0^j{$k@cM~3H3EyLjO&?#=$Q@(zY<<ng?kjwR(__-0SRbX
zD9g46=Uni~Ydi@iD>g5y_mvOW8}wU(mJOEFPwxpqSvp;!H@^_b>r3$HMUcrWMrtwy
zMi-hh5N6$+`Re9$H69?X7NDG=KyD2RDa<h+Q$P|DHURPyTq~yh&~&>SCn`P@K>Nin
z#Oalwy*X)A8%th$YGeQr8w$AuPud-@A9zzUi+$@sKR5|uNNivQ7U6vo=7_~0gH)dc
z*09p_GK%UFY&euBmqv-9)a*RkA$PHFM@6cz1275m0Qtq-5|imIcPdmzq1JvJt{nuD
zvd-w3cvfI6Y>Pk9YN#>}Ie>V917;b8XzV;~xyJktTE;gJ3^2KDH0SPKpVsN6;srz^
zP8H-4j3)fRTNlHs>%>mFI@q{&X0-j{ewYELwH*C!g-&CzYsdD!xCtx=V~%YYU#4;Q
zKszp|S(t|v1kY?=@g#cNW^?%@)ie}(dlM+X#(XSMN)~7<Pi{qg4U0##b^7pwuqtMO
zcg_+knp~E`T<{Gb{mMx#*sG1Fv0Yb^rSkwF<RQtKlcJrM1=d#tok!mSL}2?skA^vW
z!D5de*2>%QRIv`jfd<(tyu-=P+UiBdiSM?A&>kFf0;c46?+MP%r1+6@eHZ7FM42$Q
zCGWS1SvV{K41*nj!sjMtJlaJHZr;x>M_=v+Eqkht96XV)zlVMd3j7X|&%EdygDZ^!
zmo<Krb)OaS-ltIU?jtlbJhj(C1SXS~KAl7riyySU$`2Bn=<?bD3QL9-QOHC^x4u$6
zRgoQ|KD7waa=)6H+`2Q=dzL%&sVn{uLR>vS0Zt3V0^fycvHH;R6F!O2>)Pv%7w)LY
zCc~JxXp$hrEaF{z%dnGe&AArnZy6OCXF?Tk^g0gmB-lx(<{~&4Uv;PrqZMnY8$p<R
ztx~Xs8?;)T^d$)yC-b@+jwlO_A3y6!A5Nv%s@GVoKA8mDx~-3ALyO*5!<J(oRu?~R
zm<obvPaQ9_6T#7VEI=t-Sr)q#IGGZ0yn6q&^nE`ea@R)+E0k=#3#@_WEuaPv(@_8d
z%p3{i%M#uI^75$6&}G>GKT=5zf<4S}^mhK@Nx6OU3DMZjB<b+X_!pg8`@6?$eePBx
zN=;y*lnJDvn?=^H=;BCxPS>R{)F%5;_aN0#r`M0K@D{XzWLPaJ2y}oAg!uA(9d-9_
zLq|bU!P?raLy>uqBgeT`xAEO|WqR*GOXX$Q=Q;|y!C%4LHcoN2rx^tJS@RHU8dtMj
za?=sfa!2@o>~Kia)wo#x5Vafj&TBY+)(Pqz@~Z*o1_>i3-OKrzgn2LQs-LBp7k*!`
zu<&W+017gXOx3Q_lTmn%(e6~C5ef^FLP<0x8~EfR5ErS_qkCS#lt@w7yz51LK#E$D
z3bXR^7y-;aGBV7%Cla>kV^Hwa&~E>GIRAkhCK-+F`kLB(DG?kOxYYofy^3g5j4w^_
zPQ!9%sN$D#2j7CaXB=&Pj_7{L9qw^O<8wlTe>+4B4hbCuk!$$5j2VHF<>ZWld>+)~
z)by3pnV@&<Q;_dl&D!)ekf;rDJO21Bh6o;=aoI2J4oMc=N(^ux%W`5EV4)S#A3s1L
zV+qYM2;w##L<F=2Z5B(sG~dq?cL7r!>_!4h!lMulWIm1tr9L$n@lJ%A61dhLd!lW>
z0sXANsgU?{T%T|(1iiahp}Lq^(a{%f9v5iUQWR@qz>oz4nnZ!$Y-mLig3|$g(ETws
z#3YT>a*sD*oXHRwCIFLPD!4YK4oaJxFcf)fzbE<;Au9tYuKn`dB&7#b`9{@!xSGs|
zLy166Y>t-zNy$|OSVNe<wN0EkE5Hi!^C`&IizA_MfW`H(gYA{6C>jH@_MAsH<nF2m
zKDr2c4bXs@<x!)c64!2X9OS{c9^*PyOKE<H;sV&9%KRXxfD_yu<F}RUS5?d=h=uWb
z-o|lG6HNl*MLiEifc|#oZ{EQUL!gZB_xBfpDiMg3J3ZiO0*`Y$gi{6pGTsS;YSinN
z>kkep$cW5zz@nxixh23i<QuH()QYJj*-r>>OQXS&pX&~>f;7Q`An^%K6oFH3z)O3^
zqL+EuO}x$fn(>=YMZohZ^MalNSe*9;`DpwA0Kj!1Rsx)mZ3D0Y#n3P+sG46Ox7<c5
zy+KkO8G}!36v`m7aW1=f*(24I8|C5warAF;5CAL6!c&#a@vm1<9ws);e>e*31`dwd
z?|xOb^yIgR<(f<Z^;jO>k;w@dKNoJex|!dCy#&UL8=ltt%!k=RKvn!dZ&buJ;#W(9
z47?>R@)kaZ1#R-@kOw!?Y~9QS%`dQTVrKgx2jUM$4*la9?6brzz<kt6OIs6yv%&Vn
zhJS?3_ClL;XBvtQ<~!BS-&8l&$<+jHr0iYLg`Rx;!uk;=Ei`iY5j6M<2+n?;V*~Wl
z=gY~55~xW>`cVr1qZK{+6<zI#@4OVMQ@<-gxG8wu9robC-x5*xJnR@qRAE`>4_7wv
zk>3MFE32R*0rGbz(Q`J#wQ++NBh;e}9LF4n^t-!X?aRtyFxP2!isFB?B6<Ww!2L?|
z2zV4Vxvy;S5^2iD!=LL~AC`q?S#E))1&=EIrXapXHzdjW(H|RKLU0}g!KNm^0u#OE
z0#XpC{J&6$HQ~eoG7lsG{p3tc-w`HY-%bz_)<>ZATU97{eQ*Y_%U!%(5bF6up=@pO
zU>y^*K51l(<<k38_pE0yIK=q*_lxaO1O!~j)WMC)072ku7v5haCL5*-MLq)SquF1{
zzR@ZXgp+XXkUXm}V#P(EzDM^|;R$qyEx7ZyVu$~c8k~kedR~6~GP32bKVG%cJ@8oh
zLb~+YnfhM<dTV&uG8gG?fr~chk&_~Wr-*9_9TJqmUuhK1(trbeVCIGJQv#yP?I5~@
z-#&gL8}e-!O;H8i1AO69Bd+~>&nGTEsh0)bf7ZZAf89KsKE$DAx3#upd>8~5PMHXJ
zMbvuB4|hhkicRxRqml4_xLQLK0lHa^Qf)Y`h+H5!e}0|V5mF`}0RcR!vKz~s%9kR$
z#{3uk1%lrTXKM(Txz^oa{p*jRm@kjGq6~$hOSADlyrce?sTY)Yz(v>Vi*6V*YOYqI
zS9ExzWr4d~d6)oA&y$V}SYM!{TQ{7BF3?ePaen5Y##?306ql(#3n(%H&RO9Q4cym7
zyM^+;4TFzaQ`01F(2ZvOBt~7MBYG8b4&Fg}YFz{eZjG~Mls-re;Lv=0y9n(`@F6_o
z@M5>Bb<nh+DWr@>o52y6{x}vGU{c1rc>P_;amS!-4#5#NmR%*i!3K|4A4%zsT&y_q
zf$I~y!e6K0jLT>$yLWeild@f<{&iXU)yuwWixRSlobnA4+oj69ePsI~E3mGW#eYB?
z0=LEgh5Ha}MuPSds$+kjVc89i=w*b#4!A8_R;L6lPpqMs#2_m6EJ01o&nMUC`kgpo
zGPg?Ek2aHOJ00LD;h<dOX>h_MQFm7Zce1cp{H~nU?c&yjG1!^7b^WsB$V;w`s;1g3
zjbB4ftY7f|<r8o%cpFkd0Zc}YocyYC{L7Gyp3UGQ$HZNyUbPax{(C01GKnbI37Cw&
zbm<FgTB{jN?Dc0Ceh$QRraiVqn@H}TK|cN0jb(zMBt9$9kZ^5CTaxPafz28faOR2|
z%r!9;n~8&mD?c1Pba_32mp8$8kh>j^1+3Y)73U%s0i0fqfzjjj!#pxsf;oP5ujT9S
zUjt5gr<ubms0I0n<PU#8#9~#3FgB!H`0V0a$UF%*2#v=UMP%`B9c;;SU~=}}PFDu2
z8NUpzg^s^T8VhK)(B@POF(oEpI;^>|`b22Hmu2rAaD#kwO;H*uQQ$1`_$wq;7xuI_
zNhZX`;VC)CdI@QQ*c6<?JVh|82)BooV2H~D2*iDhWD37cV3Z)@x~YQ$Ds;bS>1ptz
z`-;epzi=D#2%31~P!h}{O@#n)2#FtKA&*4l{#2jzbyrpl6E_kBS2!EHEdEkifFK6U
z>2T@}-dg?N2fUSE-n|nXE;g)kaSjZ0BOx+bO*K}G9-*yUVHc#}fs;UC&ZWJ(x7={J
z{0vMU#q=MUtqeS~QJ5CaJ5WAz@mg;LR$T(&ishU!22Wv;_=d`!mFKUxNV?<Sgx7zw
zHd}&-cmXPmbGoY+E_A@0p{`KhK$HIy1R~;8;4SN{uK4|RiXdV9x({F(Mgyz<Rg;S}
z<*#F3g4uweWWJBY;%2;H%1Cs}16yulK<^6QOVcBSyP7?N+)qdk=yF!l!v%<XFr+``
zU5$<?lU&mN;(K_2uq)niJw`&Dvqw&l@JZk)zjxE^Kr&rvPY{d($^S<2a6_=NA?T9=
z2aTF{6CDQ^VS0BaZvQ*|VLbbO5sp>TXgX&^xNSLFBJ+&`O(4gb!C-yc#=EL+!H4*W
z5E&6D?20k_)u>Rn3&-q+PEc&f7mzt5=YJnH=7&Np^((s#p420Y0Y~%rjO*uT#`eqb
zy~saAuD`yW`By~6^upPsYLyfT1-t)^C^&tllLI=pIrIA|e5H|#o48hD0}t?S5O2%r
zwu6g4ya~9_AeB!L2H?!YFEjw3QQ#*htoavBCem2^gYh@tkOQuaKv3CCL>xz2LOARO
znyBAc5(L*xpbJ|}-3<ss3U1C|(hVkT9d;pS3$|t5_WlR(7f3<kqCdz6o<LBzZQxkD
zqZ}|b!0!Phgc>9%cW_2@?OVc~<v_ebQf+xwC1#S;<6mr~8wuJ&s7`#B;a0Hq`%;?V
z?8dgEfuhb%A9|VA%$`rS3*)-AtZ6s!<%%Gub4-u>vaASTI-<v)Lkmc-pF03ymRus+
zpZE~!4}sYrBrATK!F+%R==SK<&=6Bde(mueXaa9iK?^RLy}_-2-jVeDKSL47gFt<1
z%wry}h2Z&DN!SVf2((HY<al?Xr42DLi6Q49<iBbR{?r<<j+#i)uJRxa*l%C~4*zf0
zC-Ga(Z2sIR4=l-bKuzv!qS!z{i$Sudh_vG%Adt6Ei6KdciXS`rm@Quav@rD*4bV6h
z2qOk<Vc*uAGtz`q2C}wJcvnQPS|eB;OlXR!o7}yPh?r;3#j`3@O;qZwdUSisQ=%xN
zm?vBlj#d@Oa^tNo9V^i~qytL!nh^wTMj-e=k5L4Iab7zwQmNb@F){J@$(t)B*zVd_
zW>Ffh7kK?&oD-12NzyGmcJT{jo<P)*(xljCuj|p@t=Y)Uq6ZU&R-FcB3o+BG?^)5E
z)%Acz4nR6Fvrm8$`m&l}u#hx`47h2{sAYGVNv1kCpl+;7=U;^sz(J$U!NyTe#u32L
zahVX8QuV~Y{qdM3^_9}7YVw4%fesA;So;U}%M}U<Wc7i<9}Lu@^5HX6|FDI5=J>e}
zz9v$}lV582ExS_Qq#Ob3rIR9xBe>@;9Y_j1{C<#8hip8-#cSImB>Hx7#j$2x@|&(j
z&jM>%drd(VHRK6;{lRB$HqBq7f%%lMNr1>I1`s@7n<TSkPBpnBLl=d1H%MNdM+oMD
zUHPr4fW${Gfk32x$s`n<;~dY*Kb4vwf4QmW{9D~lF>jQ+|HL&uaA>21cg@Bvo4^p#
z>|a=w_piVdD?TfK)qIfACLxnQz6Fy@UWX)pX0CKquRp3HZggvTXMJpkPX0+F^^XHG
zrMhJRs`jv9OocO3A=f&xbI<BG)BSlv%jPFCFF!@>n4PI|MjV`cdWHI;h01-g)Km79
zM$9_GNvSWu@StTO`pdw))ox#Cx{#XDIpsY4)*LaTUuuuhF&)=D^QT=blZppMCfZtl
zAj!r!?a{9tQ*CTwJ;5=?eTuCrx|LmT`B!i?a^gfMeybV!aBQ{jQ+Ot^M+>9-$W&3)
z{L|ey)5%+kZpH!6WdLY=DA;vnQ+Q?je*>ZhSHpO*X8bHNfQ0N6HBO+FdgVEQK)cBp
z^Oykvbo2VT4&)72$)Ej=#x1abRf(oQQ2^@>4nv{?iz_1Kk^?(0r$L5gCOyT~F9mkX
z{bp1T-k(O^11X5IvDlfNJpnF$bBe+8!<MDk?|zK6pvO7Fd2`Krap}o$0lnJzi<dtv
z+YkyM&MNgEVT43^7nw|waVl47hox_w{?9M9Vyw86%V6THz5t#=%i^*jhV@ATt6`yz
zhANyxqp$89Lz3Ag=^9YwD)6~he|41ZN{rB>n*XaAs`X~j(V0Ba-vOQ_uv34sr*?bv
zG5UtaM_x(GY(h2>Z(Z~1guB4~yJkKNPp7w3JAha~<xtLN9iPXy#naLvBK|bO3(u$t
zTDpT^eAd@}tsu77+(a5)IoEuZdU(roNn8OIuKic2&Kg`C4o~uIy+s2eGNdS(6eCgD
zQI)yt6o%*Kh$RSDee8CMHy#UscU`sRtNa0xb)jXIp0JJdXQ)LNFojGNr2&HJPvYbV
zvoi{EpY<GF^XFWNfR<ZqstjOGOi%4o5~9NFe{-r{Ip^p<6<g?NzzRMjg?(zcn12c+
zY!{EH-j^Zv12bxvKY0>{ocY09_4$4+8=9BF8kj!m9tNT{9)yg@DYw%$S~59z93%}$
zd*c7A;cTEo)nPzR0gt#ln=oC(T~^Y;?t$F`EKR9fqSsHE{awZnSO}!yn^=`Lc>XDB
zeLfbz4}bDy40*ZqynfyIC=Dm2AdI$)Uu?PtUJpR|0-5DzdT{DX?O`ga4frmdG7;t%
zso?boVCw=TSD+Fvus;v%KAB?Jett~lxDg(*4R$6;AsEhuTM6L;a=@vQIu(snD2t7x
zOnr`E@ez*hDd}g!@B+JHy6|}_p(O)8AP`h~<nQhS2^QuswNIp827n#6`#xMd^oeE+
z{(Y`)4T?*O-EnNZK$8@Lm59}nd(TlY-Xzu{B`gx)q;CV;z%s5uMIgG+()#ehCd_pj
zoW=avUE*7HGxHI_Dk-B~Y<@vIT%Bedl$=x(v}-`TirKsS;ttDp>YkIOM^*H0hFs$T
zF(Uaff`Oud$ZL|1*N7f!Ai<7FhfjqB62Cse<xY(9fPGLB*q*Y}?N71SnVPiLRbwJ!
zT;BeYvKTZdD#vu&?18FBLxy<7kyCcu=NpJ6!Cv%_YvLz6uNb!tmhsTfNMIVpcg@SX
zAOlE0{%iamJO>0Q(=2-3SnI#N=7FmnT%_wjtq%$EyF3t2pA}=%$0?T%(MOXfkJryF
z&paIb2PombDhL;!4{v%8gBr~BT{~BCU>q}km5vh;2ar+fj;b>6V%?a9P=>oG{7w>f
z`ka9g?nAShWzot1m-jq?8MQ$7%hLMNy$qo@A!O|D(H9`<U{@8U=v9@ze+gN#?qXoM
z*45_0<|o0UQoBQUV-|N;u&#g9`ce9;6o(Lr?s-iyY&T4oo(YX!+`M_^RRJqw>oJB5
zK7F2v{l)Jx(?lwrGzO;*JnQ&&jqZzMK7a8W`%o?0LvCtKgBP1h-!$E1xT$Vwe4w{p
z|CaGh{v<XI@y5K<lllxd7X>PFYeF3RFHSV3<K9M)|8#is72$GO-5CSfz$VZDCvhL9
zQp*J)p8EeByk>;bajz~=tN+hS*)R%A^0rX5qO0-b9m~i4TUVCo^UP5*sXMmWrF@3<
zgH=kEz=0i;nE2t%@pD->Tg>tX@)k~Zr@jnvpzY5yX}|SG8uw`AnB$?g7$!^36W!S_
z=aPilx^j){VIAsR?vCk2(GP7)mhol$_9xq}FlUv3cNQ>$2Y->`JzcyS-cde7=03G<
zRIc+5c+4WJc|IQiKv#D8l<>0^zhyqT$GRB0SY!f;#k0N?)3#3nd#=D_lABSke4;C>
zM!QVIT`4^9_UmQ9YT@{EOPjK(s+uAL=|IVwReO9UV04tflbp<R6GoY=<H+fA%<taj
z4xE=%f-LRvLPm7Q$Q0Yg1i;=c<RA%2H8)14Tt4^7gz=R~+}m{;clV4p*BH$3`oCJ4
z^-w{95$}IlHXH!ngD|CUytpv|kYL$~7tDnthh7D1q}{c_r5FcEKd#g|@bAA+UIwE7
zt!Q5@5zGl>3v_4C8dcCYeE_eZSbZG=l5+}TX;b5`Grdg|)3uiWvD$#a=d@Q~U?P27
z^Wv!yUd@kRa2HvbG5l1EwfCBo^ja?R%}!#(^8_ZwotrFX(>hC(p1jDxz2c&$+c7wT
zd6t)T7*rWyfh%J}Wh*>w2jz2({dG}SSbsEH3{?zBTK=b>S67=cDLL1V9NK(E;IMS!
zO)9#sM$Ro3d5JAuji%4k=JH*JHiN>Q@3(<dsSTF@RqK3fEN#$x4j##+J(piyCKKm9
znPVK^Zt&$<M8=QQ@+Kfrj%hnORGVjwTjnwxUkKIBam8KVNt%Lty$PP$fTM)Ic16SS
zj&uResn<w<YO^MLfA1*Z0hqR;`!*Iz;+$P_f5#^AQJ*U?9z2&%5OgbH-tDQd_@f!A
zAO!|d4S5y5Rs+ov4C*fg`vO_TT12|~J#Yr^7vZFT=ce;dR=))w{_=Va3<l1r|LHG>
zdvQjEJ(1Ch5nH{-uicE+Q4b-$+7BCMo<u%yIktM?@&`Q4!4~kZ&{%m@l@A9||7x4$
zI@3vb3$6g*^1`poy|K3Yb*W}u(z9y2H-lC`Qy0F#aJ<*~mp5sE+Mb>1t+dpO1r)2(
zwDGt9H5uURxHdEtK=-J7Hh=lW1S`H0x=?&C+6q}#8VYL9y8IQ8SiT8Pp8v0*0NS(?
z@@s#ZE=ac8Pyj(FHli^?^8WwIaLyDyOb~W5p714I_jAo~(%97ofR%uVP}6eYMMl?`
zU$+73a3236PUR$MaFyOJ!;Q0~DmD|z6DC&clBr1e<{-40#@7`s9^88D?-HU3n@KOx
zS)<u#{!OcJ6$6lggPFt)(W^Eln42i<k027@nIi<Y%y{qJua-rgU$4Rc7h^!8FC(?p
z@*pl&OOGW8Ri(u8PyYFOBqMm#4FFjaTkqNVtM>%`iiqo9p|K*VTy{i~zxpvwDgZCR
zD>MK&cE>fC9IOa%pl6+Q{GXVw2orMQh=-73o(b!bbwu}xcIo^*73t8x3ZBcO*aSrg
zbA9K|e|Z5y06FE2QB}QNtoV)_y!`#KXUyjkj6jRbR$9ICf08}?>kAMxpTh{WvbHZG
z&2U|-6QBq|1RCUdXLWYqDic>Ss&y59$bkFO$n4c0Eq_?GXe-}?3O|TO*}@KB7O5-N
zlk>Np0Y;;sudSK#oyuCjC+7R^ogmR#_c(%egYW4m0afn22}r?0n683j{cnHkhSt4M
z1F#N0gEHBr{`Z^)8SoA8DC+$zxPtM*@nGw9%K~DzAts0S1npmq&<hQAC|ty+W-E`w
z8*RLBu4S;G<PRVyM+$@G#?^nT)Pv!zg_hfRBT@gfs+AWG2E+FfDhPDpV~j20NXb8u
z0JnuqSc-n)6Wtf?l8ImZ%0o9e5FF=LEN?{~+>~vT{$4yk^MlFt|MG$sx&2^MsSO^)
zhyV66mKD>mEF2Y}D@9d{mJo6NJIb#H`vLZdc>=x!D-!3#O@Z-!Dj8t-La~|Cn7;)$
zP3Q%N-pO`?BVoOR5;e<pnoPt^(3b8EXED^+v3Dnd5g-vl^FKeRMXT}b`jUDUoW<(b
z@XiJkKAcOn&+Q<!IBrv?X6s2gI8@Fu^+0s~Rc2uEo9Ba`b?`*BfEbvto{KeU_@EmD
zno69>p1$-Sg5#FvPgw|c%_%&q0+TB^myY%7Fp>NSGI1Xq0qy!F=c$!rlLM12q&i-D
zSYFE7rR%HOuA`!(RB$QFqG(soGZ(Ey!Dl(qpE6<<lS=zbPrMj=VX0(!)bjq7LwU@d
z^<S)2h9oH?Zb$xi=Frqf3k~izw$Z*t-Qje<Ie+TC{%vlMZRRoh)EGZ<IMif$)Eqb?
zP*f&3tZLJ2UXWDE+jRBZ=WfuLCNlnNhbOhH(3@HF>kiXkSfy+coD>KTuEI$lVao78
zSDY7|X<P)xQ(nT2xg7s0?hVg_w$t*7NPGjQ&)MMFHnwCqU`_`dEaZSgA;8EC86~5*
z)FODS-3tuv>znXQaM1Aug&lq{+(07jI(9%zz7v{``<K6gbF7vbaC&ENM3DNk(?1?s
zfWyk(Q(#&T?p!4t^@e*VhO{7<7ZF|FG*&K}HxFJ6!J6?^Kq=9|7tS}u9e;Qxx;TxS
zaIrM)wiH?GX|3DBzXP8A=}Zn<FrERU#xBFrePCdAx(B;p!^Gs61>8!EWeunmUW0GQ
z=o9j?Y=8$EV7DFa{Ttx0bzY|FTQGudj>E$#H!TAU)hX5kXHiE#^X0xN2^7xCRCw4A
zCK_BB%>;W;n1u5WtW9jYeD%CkHQ4VOpKT%r8?Y_Z6N@*{0Asr4>uzw`Hxx_UA1Kez
zZ34sMV-{xM6f)G==8K*1fJkb<Wm7Q2EGE9&f?eCblc$NR;7bu(f~raJfc?>8bX<Bq
zIPs+>0?yD}ZA%_ii}C*e#@R9NO@24zKAgqB1jMiwjQ?2^LpZN)Hn1jmt9vH<bA~ba
zzK|lpHnTBDzQqRK26{M-!n*W%dC|)$c=+2BzSlx^*gAvdoO}zM=4bH@^Pg99dLci1
z!TB;2KW!&nA&>G9dFSi|JRI#K>TvVGwwm~p*AvThsHv=TfB^T0UWu>BFfO|59-W^N
zdye;$5$E%dZ{d0B&M!yZ*i}qY<*gZx)LCcbmRYM033lY3|7@A34~~{{>b*3-KX)Ou
zjeC|4ImH*@-1Ih&f1#_k-Vqp<7Cw34u0u#HlA6J3mgtUj_;Q3!Fe*hM1H7goW`AV{
z%l~*f>#(NRKklDnd+e50DM3OSM(I&X7^nzHi{waU8!&2eY`PIfsFd_!lp|FX7-O5{
zB&V>Tk&PPp+~4y&zvmDB(CgaZ+V1b?{?z;R&RYD>q66aQq<F_adv+(v%8$;Y1X>aU
z0gm5>@cYZ55++0$ERGFL*n+5U|7l_Xd*wf@x=ov|^Il)T9LED4YbyB0D;gn+y8@|}
z&vv@9a^wwU3}HzD-*w`i=iaSigW}eU;EM?@{TbhQN&=kaOYveK&oxczF(G|038?d|
zMz7u)4@|W7zj4{ft1vs*Gv)9$z?!^YZwv$aPzMKY1}%Bh<BiBA*xWBMHZgP?0<ws+
zTOr~cBv@qvDn=-=9#1LNWPgUt9pw(9q4-AHC*XZsoX*HhU!fs7cv9?QO9gR+9PJ3k
z$HG^F<!Z}1c4(bBB%yZQ3@C!6e?0=O1F|9;KiWGXF9HQ4o7}92Snz)-cEfW3Ue&*>
zmv|Bc2rca&u`e=yh6P7=s60wMz)4S-sj{nzag?EX1LQy$S}4<HR1CuaoV2Z_Ol4TQ
z?<Rhty^6Bf8%;C2fW4EYd-qnN?MkKZF{91~K!TXwulDT~jCVs0`$k!B_TX8nhV(%q
zZ_yXT)Usw8Yr}`)7;~1)_Mn9Pr|i}3AuT(cZ0MP>&gB%SoHhJs1)>r8(=X7@5cZKZ
zyhEq#aLn-Apv(q@ZfOl&?32W)tw(|}eie)9CmzY+_yXI1c70T{DM4C`ri_^(<z|{H
zRSu_K3r-=SZB9=3i(JAQW{TC@>8eGF(S~`OFu*=9(N(4_+%YO3ZIMPoJgcJ-d+NpG
z!(`|;{By`#WZxoFJF|HeR<rrYC_VFs@$2oA=jr@J<b+-{C<5Q=?;_mqa-<fXOY#ey
z5HOFGAx1)eF+8Ak)neA)gL2I4r+|bv{A#|0WcOvYsG%}3dt9LvVI;^iFny89dFmQ`
zOh|WBJ>o<JG{a@UE^2>e7|bM0Wi?AkIRO+(N%&PNJjpaoq?Usbn}Ot~%YUh+J8P+x
zaCyMWWAojaEX)n*c!w$uk52lX^nH-D-VcAExC@dc8*j&7(EToc5u=EXmPz_rzI2-n
z^Sqh2>=j%}#moUsC%|ca9?9I#HMq#jP2N`p=fC?W8-H>1_ZK;et5V%5^)zy3lS-i8
z+me(`z`rbmH5gX-6HIZpnBe0QS@*>gCB%);M)5hf!cxV*8-Cg%>q|G~u)t*$Q|1&5
zQc8G5T@6T89t`dA-@Oh%g~mijYG6oymKYWlGCD0_`RC(PZ!i&%(7Q9naF=~Uie1QD
zO}+r}*eV^1Z78|t`)bdBb`olT(}ChJMXVWpWhqLo{=+n1rR{QmW%iF2fQ-0Ih|ua^
z<Geg+Ue?OlBO&?-Tv>281=8`!^lQQMp%6Z^=aiqCRieX>hQ~*KkX4s$?KM^l+t-U~
zuMqD-FG8PqldYafDrg_H89Mh!&Wm0=ES^QGmo09-ezrqohKV>ky50I>WH)ib4gxZb
z*Zak<#&3Iu%vTBlj`O!I@)TXJG7yVgA1$l=zcR()j{#6J^W8ZDV$Ct$yDDOrM@@Ty
zQnWH4QVN=`*c?Pnw3!M&<pLx$UN#ND#CH%SBR8Le4^2!tl#9%|>NnOneWJDLl}=pq
zj9wGENJ*Q=azm+&TLbu9{hb(Cu+;6|ddP4mLtmow`sogLPhrvi7+sjh=^dbi{NaH|
z#Ql}Wj&D7xIolm*zO%Cz@c8<!+}Ry*bWbJ;_UUdldl$u3Thh0F7aL!o>~{FwH5j}D
zjfS~RTC8FIzMJivEL;*?6U$dZi!8JN-z|da;Nd#&CwKGV-$y`|(*Hp8FKFsKv6o{-
zhahQdM0)|p;Hmuo+-i=W7mRT7L#QRP9|i%47{m38Q*oStfOQ0eaN=Oa3Bv^qz_&vZ
z$(_-w5AOpONXQ)@pJ^>-vU&YUW8t{fwRYWR$cN@y;jU4bWI(+G^I-HjLo<b@?T7_P
zrM%Y9zw`1FKzLj9nD^-F7cSvJ0$_^396B|54_r7kZwS4TKkZb*24V&nb_0grs-3_z
zh=OUHMt#lJl^X-sKWPB~9;fbSp_dFVnB6dZ&jidub10od%-#9EMHcqyJ6ep!Rsmzo
z{r_{VoWnnvKG0`*!BGoB;b*t8<-de<mVmC?`n|Wdy5-PG%f9*r@E-|R;!s_Y{@hCy
z`*xjxydo@v(;wuZAsKIH)VqjlzVm3(4n**Is`BRMlWV*GO!Ace4d|%FjBc~0qYDsm
zxWY=|-(?8fdd0^voW~@AOJ*dc{vO!)i@1IgL^@s>(@vE;eC?2%!%rYHy#kKeWVI>-
zgiCqA+cpCnDuV)alclOk1oXT>WJuTi6iaEyIc&Ke1ZGPbhamD1au<W(t4u&p$Qg>1
zxL*$r^TAYaY`{38W$9503XA+$yLwgc3dZ+MZSDPo$It&20pUEf<B+*As{fhWfD^DK
z@x6>e0CBGddVFPDy|z0BQlnHd(_8-VR&V|K?o`K-awx}SKdxwApVoG4e_Q?&fYFVt
zGMX8Q#;K5O7y7QjW(@?bCCa$*?Xo{*D8C=Pnbp)XngZx*7J#iQi7FdKmt2nHo7Fdg
zC?5^r890Cv`)N0DBJGRz@i%LB16e7sw3+~voY$PQ@7CVLueE>(NHKhinVkUnk(%gC
zZ4dDvWNX3%jyHZh?O7nwwKg()k!0KY)tJztQF*7r_+!f;=rmEGj!t@F9T;`qa{7*r
zfIM!PAYje~H|1UlB^o>2&E6Pc$7VC2CFd7^zqKVIKf}NW&+S2}17uSMWd@~RfxS0h
zt=&nD{seI=;kFA}yvJ^C?`L;v#=u`Aq@rl#5209zGn0EEBnq&rk&FRv2*`jzKzgrL
zHdBS?KY-GB2C<OxZFK|cJcq!e{M~cqI~9DL!!X7B0DD4;lDjPW`hR-?AP-j{F$C*?
z<~SzZ?b#>P0rfQaOFX(792u{Cr7$iCa@3)Wb@K<n@6I><K>v)TB;O1Gp6FFr<!;Lz
z6kBXCwtyG&uc}FY8(4lW(b%Hl8^fcpw^{0^bHkt=6nV8MeJG;)?}D-_iMj{-jWIXx
z@4pWeGPkb)3~A6?uSEznl|M*+0To&HTM9rZLQ@_7Cvx8+`JiOxRavzgfq=Qn`Pk0O
z81xn)!kgW=ZDw@mIPFe=zOpveUhebnx3EfqXkx)j-ymBm?MtG}90-Dtd_Qd}=7m<k
zBM79fOj6z@bZE^v<2ezkFP=-PzWL8zs(-?Vze}+Q9swZ-LcvGrUA4rBa<y#&{DX|p
zy*K`K*?v-yb=4UI-XsmOfyx9$AQP~kr4}&MkWR=oCl;`T@xU<XUcY**^u8UFq$kM1
zFW+N7KZgga{8yJ{(KNdW-g)|LJcwrKlN~D7!6$H2Z=kr4?)%U3u~$zva9fq{M(DF~
zb8pX#p8H**zY6aM9fb(KQ*XXHPNMn-M#Mn*dLsP`=s6g^#SxK&J383y%@?py(`S@=
z{_>8m?@oLKc+pOj);>|J-X!eZvTuq{cl}pqq%#iyn;(CE23F#dsbTX-=45W1!|wy1
z!GOl<H5xG4jEU$6!Z?Vwo(y5}#47-!$5}<2+pD<<&T!LP9lrx6dJljjP<H2djh(96
zBC>Q8R3YN+4zcZmQG6VoxN_oVbPLFQOY%RX_AUbmAi~D72K+SxWogv%hMElne#lpc
zG>QBU<l}b|#j<C{08=aEdV`?Bq05cw`#>7;JVEH`A>5K@B+|OVMYDoNC`Bq6_ZeKs
z+1NTxJ_o!9{&P|g$#{PxOeQ|WbKY;IeP3s-N1gb*U+tCtWq;|+XW$t$dTZ_Cwg()f
zmK3PsMsXsvdV1_o`G>ke#Pe|Jxz@~z8i9yP{(zHJJ-YodKh*ap9kAE)&7aDMtmf=&
zvh#6qC0?3O<j$H%gdT<&g|E#x?${)v;@7YMllGsJWA52}t9OpuoHTLWL`ImwA+z_7
z!j-eWK6C{Luk!F$+B=(E5~zSJj{@O7R6NVv>;Dfe2?0UhO=X$Bnt(2olQc)2X(bDs
zccJ(2X8wZ}AbaQje0t1M_j2J8jpiZyauL$jVjNcHwP*oIZ+4P#=Sh7uVl6QMsw_+9
z)}dq$g_kWDd2r_Vn|%?!Is03H*NC$B^7C^JKOU{SNCr%BWpjo#%8}X$UI~yh)`V9i
zv067r6u45mhf*G6bkl?DD@_j9W9FyzaYJRI+CsIq0n$v!+N<17(j|wSi%j%R<ldR^
zs-xHjLk*Hy<E(5+?v^Mm0%0~i8A^_mO+Yf8oym0Ov9nP9%i7D|h52$S`!;6{;zX9Z
z@EW1=`OwL^TWQ;%5#p>cETL*&T#HaqAgv~>xo>O%U{iKq;Fp3cS94ExtVRHX&B|(}
zMgTgL!z?|ldGHkLpXp*tW>gNmwLT)b{19upfYVq3-IDuV@vCORJxTnvPMcrtuxvYl
z_G;k7;x=I|*F<B5rZ4OnZT97B87r0dF?s)Bz7Ewhu!u%0W-mONDk@EKl`vP0J4;@?
zn(k^KpEMUw{a2mYe66{Tx$cHaCpJ>qek)0KIcR+7P|{qAYj3u9yJ@P3eQlDD4(z%w
zN{BpWwyf@H>6bf2nD5Y#T@A0V^x-daUaBsYZ=W>$+b1?Rb4EXHG29<yOf6GGDn0k&
zK8MyakP;gJ;(o@OLh$zAz~<&>lK6|Fvj&LUQ+6wFMwfC(!nSpRqo+E;Hld1bwB%8w
zN)}M>nU^}T&QZ5YGB^CQ(k`yb`)|=>QuD{I^RlyoxJ{xJqk<#!%?3B!*+X;TSmM_4
zRkfYwQI21@ZAViCsKT2TLidwVH~v2`3BHH(`SGVe_a{SyoILmEn-)Kffprn5GA^u?
z@q)hg&0kbSS6>8Nd*ZKkq#A%TQ(7ni^ZAg&ZN+WKj0|cLostO13|krOkJ}IRE^0>p
zYubQOzGuhAVT}IjjKgYuqAKf)M`(zC8+Ugv)JeLlLyD=8oCEew=L-N$sp-}Oerr{Q
zxhkrTILQZ82Cg88myJOtaOL5D2ui7OGLkIAgK|Ls=e1R*A1JZKm)kF37c~RtNBjW<
zQ)a2-<H%(#aCsI7K|X1CVAx?b51F5JMma3KFOCgtnz3iTagLq3{Ms+iM`<x*&s&Jr
zu^u7059(jk2yv`bj7kOR1yCNgGY9y-{h~MVJ|}kojn9^|_D{mx7KBlpI43)}pJSz1
zNu99wzw;XuYK;)Qs6Q8j4}%`dMEPBWi`BRHnrFrfOePgwifc3)2Hc8ocu@2w2Qkls
z=WeF>W<AtV2^RY?4DLAMJx5tCU`>7qoi@)Y7FIjkp4P|^7cXCYZ5|sVC^}dHF`s0s
z{VY?I*VuVUrqWP*tOmFbrOs9%utQ^SgDe*QBj5``mW(A|KUZIDw~CDc?r3izZw7In
z*F^m|;T?pn4i+!ZP`Oc%9~zsarC-)vuPFOi1eldtgh9op@$?9Y;l?X*hcPWoUE!8g
z=N%B}g3sa}b&$7ay+Fb2vz(qR@DPNb!B~VBs|k>><rO?v4N^dy59w#y%a}XX!iUlg
zW|Y0|1ydt2!h}<4)lN_wN(T#p*HD?LA#d%1Q*RIvp>2UxN1%XN!fl&`C3(an*Cr98
zK{;MeO{a;=pP;@SWDsYSV?~mwG9f;0`jtyNML7E(jME>UK+Vwdzd}2zfepaNy16F3
zN%Kk$EoUfc(-e3zlsG3$j@Jh}5mnn}TrZbzEDC9*HvGfXp1?vB+EpnIwrK>iMuMUK
z-K+rAcPyECp5W>~+63eSG0_1_88|HD?Yexq$D5v+L<3oA;<~?1XN_$EJK<j=4i*01
z)H0qMOre3^q$Rp3p34C0;F6g^{f)b^t%5mICnSA{%c%0*z1e*U2#rGaX7<dB*Fz_F
zj)hI$5Wlf9_qx+PJhl+>%(amqxpoAVDq>wa(DVBNYcSF4mG{%k4etKq2uBB{>?sJq
zL{zyRTan;8j3K*!hG3T`ZzLM}cd@w_+8@@!y~h(~(yM=oitr2}o0*dK#Ts*Lt|G6L
ztU>|!XB+zp=K7`KE89GojvCJa9D?9LV&7}7`}Y1$SV4_s{Fd0pea%*2RY8GhXV0ax
zr<{G>?Tf*EpV`uA4{WYuS4p$~6~t<sRbm8nRnJ!O%HL4?9+R(VSg{*!$5v!|aWP2~
zvddurbT8!C$WLYagMfLy3mr0#V}z#9EQVsKHJDg(CW+JT(*JkTrX<}_9&q~-sm-Bh
zUBVC(5EpLL&&ZbBbTKCjf`s4dH1)TTJx5~u>WJE&^;A&up=?D%^+$b9qDT!ur25IO
zjp6iqlRR~76O%I+O<!bM;ln24sqNk`Lmj?Jt?uPC$<ov`<i!tOZF&}P7n^vClb%ie
zJS$1iAqC^@6XR8y%V({iypjYV??;D?Lj18Zqyp{1*g<R$$w4OsqKrjXJnRY@NI>3w
z0X2coD^>Ix#>UAg1$V6WEt2v^?#1RVF2{&akDb<NXE$5c8kftDry*@tX1sBkUjDk5
z3))4np|2fRb{5e!n%OhNHFnj7nfp`SAyFFoNZpIW7dBidXo6MzhTdbMLQY7;T=@+`
z%OTJ!DVlF}$)P;XqVAYhkPCtjvy|EivkM!}CD<%dxNRmWK#p8~&~{EUoo#n^?+vv*
zHlquEh1;2dNO?ABK41Qiramtuey1SIRUiehe31NbIf5@$L$-y)gvrt$`wOLATtLE8
z7HoMj0h@u@BlQ2gkvUn?G&*6dGcfhAhLxKZz1LqhbEe|O5s#}1bc3GqP^_sop^b!g
zop)po$@z4~Z=54+Q#b51+1rT1mS5zh)!9<)l1gZJn&&nA_q_8l4!(@AaIDRTN4Qh|
ze#PYXN1|Ls$bmJXaRFFlnKbnR{SK!|k|uGaTOS_1(-F|6GV#G0PIuGoc&v!Gb5PPb
zysDP;HYel=vAx3`Uf#`Z2%0~_Xok;+v8zOTkvo^=h(c?S>5BhOu~adi_L<3Gj-8op
z2?x}P<f4%`y32j$C1Wl6fA}jY9^@nZN;{TqLfvm9w~{zp2{mO$o^^dQ?sVJkh+h=b
zpM$vMfb3r`?5?cnw>bmRg#AHYZ>4sx%1(!2(8&mUv5D4~6YgoV6N~K<*~t)SM|gF*
z6B2lsoQunBmxN>Az9t;&NKk}KC`+QK35$(x>6st=E8hrQ6xHq&XKntQ9QE1dSD{^J
zoUlJqooGbBtPatd10t^b4z-CnI-D{KKQGi|H&z(Kdn|n})7mUoQHmyi!_*YeSRIMZ
z#rkzp!xz<%oEwwr?%CGl&h1o@jl|>edq^V}%G{zi++9NcFQH8-(I@SaQ9#E^$FZc!
zIQN1$VSE@j*H5m;U$M*aZCti)|0<N`N?Pbu2{5AMn}e2HgH}do=}ASp8z#P5Es!~h
zQ4eXG_90<V!rE+Q#?FM{79NY!cQq(r-X|Vwn^@&~FUSIGJsY~=U?WQ(J55e|r$5(G
zy?GnA1@j@U+}h}^1zr#{jcy&)$!3Fgj|#W-Z^SZZt%~Fw58rhUe@59(xVWG+2Wn^e
z*K9B$@w`X=2VwM;+>FC*4ASr;|D@-R<&~wUiClY_EbnQA>Il~<{_&>nwbVJ1b-+?|
zwUXm)?sIirkGL+<xu(rO-pCg!h@|UqGUgIRxEze4BKb<h^|O=T9J`ozEOOy6!X2ex
z!t1fXo)x>D&Eb|UTOvGPOIR`IPx3UCt*68GpNr?Zmsi?^QItz0d%3*C<@{{BT;+8;
zBf`{q&R`2<>g|x(P>{QoOG^-#;f0E5myNFSE~=@SCH@hQ!xH2=@K9}S-2Ah!a7gU-
z4h_3ymBL8DWF4L&%j%wo9A(AT4)~8r%WjiuEW_`a+HzhhYDm^zb|BwM&n)q8E|&m?
zS(zlHHcp@ns`2gN<j09&SG7I!-OD!j22fo4yYB3`*ifc4TG;g$bw9al=SIo|;dEDs
z7|l3aa{RK~8;#GnWD#vf0cuDsxN@`RVYmxb<!1B#uUCh~=15*`O4@23NO=58v_tdY
zMrd7U&7<-{e^GS>M#!C5UN?ymrUiVZR2hqWgsIep^E~E<8R!4?0uB>Zy;hYjs?~s`
zXQzs>u0>NHT=g$YK`Zk=OrI%v<08Nac~I_L4vgjXzrklJuf;~BYYo^uSF_(h>YU=_
z-dWkmZ1vw{g2a46p%+hOH9QR76sEHTNj$k^XV0a^KG2;WP6-_w0gSKy(ojiqoE8X>
zeq!wpRGde2d7ISA^aUpkeSMvUwlla#%5)M(dm5UGl<XPQ&)HifMmx-#)dJ(ZTb(Ss
z3<ocz3fmh)ii7nWZ6$g;myqhx0s-j7wRf@NzKy3-v%FhF$HcqqQNJB)Z4*x;xtesf
ziFgFLxC0}_#&14K^TBz=xq-v(?nr5h3@wN=A$biXZOI?BM%+lWQq*4c<=n8Zy%9dr
z!FDT%6T$B=G<|eJkmkBgdNJ}!irLFxXvPV5S-)8^sHUGPfFXm+V#8p%OSL0B$GM;v
z#5hg-c4#4a*_g6eiC`u8Bp@FQC3<D8xe_uBoJYNdCJNaGgb^HR*=3V!{eAWa5l@hh
z-%^s(U~yY-)fS4?PnuAF+F2G;*NOz6Qe5ZeN+_-ivJ_XkG`JOTb9xk4vWrPh>9uk=
z%sK!yxapQI_9oOIh=R;E_m`JnjrGS)LXE-nE-XR92UV7`pv1=!x=cgxVv}Y*9>(Ns
z?Bg}3&@U)rm+Fzn#IWi~UL;UP1z%~u`=V5Sg23vkc{HRGJgC3W+Z?(8#~X)VA%<o2
z^-lB4FXzQTb#lqqUv@c#vgtbA-;dEGcwz30b?pj;Hy=htx#r=U@f*gXTulrb%gru+
z8oRh*8oPT{4Yy##JDRl+KnP6S_+xN~6V}qLfD6-XN9y<$_+ynY2MKlY3lY3VLmhn`
zue;dXRJY*fRTfu_u$yO^t&8sr9MEUNXUd>GsP*Eo$X!NHNc;6mJUO3}M20z{e!kC`
zH@k~BCou%|u9!}JwQ_Sgk=q!bK##stgB|z@te{Bx*sO{OBRwr!#Xnbb0e6IV9CDp3
z5fMUuPCZoV=GtyXeSKjc@10bI0-s~SU{EPH_kQ@lo|bL}h3xhSJ9RZL1JhfT#)pB2
zl5KIzdup{C6TIZe;^t&VlCN)Mn@ecA+e9UWJeT<qmokM~=K{Mp({n|g?ERFbw)-*4
zJa^z|h)^u#F-DY0$HtRhD>$M!PmS%6P9q+&1k9+%dPnt$2j#g@Thie~XV^0EoQpqH
zY{_k>lJ?FR?wVP{@a}|cR$ERC=46mtIGtCF6Iq;Gn<_=12W<(bZ;V;_XBKAbGbN^%
z<#e0b`r)Z?stRxq61WxqM^{kEyTNrES*6|F(!@{Cc4@>)$n-fyBv3yjeA~vZ%<uM1
zEqN>OyV1}<>{eRh1=+h#t3!$oHtqfU$Ys5ZYk4ELpI*Ok`g$bcQjgf@3nwqBoZkJa
z=h@AhH{&lIH_U65jC_0PLMYGg2QF+iYcMN4%cqLE5XhdN@6vFmVBE$ulz?E-s7YmW
zjOh;B3pbWmv+4bfu4@?exw?y8{d<Bmkt9%GWbPk*-ce^NFqTVr+jUt!>5J^c!N!?#
zy<{hp5W-Gy%YHQkMqz46nvqQ-kyFNw!B@Y0%e(4Ix(f=Wd%owBWph;JX$Ol=nYWnz
zg*z(fdgM~8`k<c(C;sK>sZa6Th1sX+YIY86r5HX9y3vh}pfzPiEEUHL^!-Xb*<w0k
zhs}#MbC0mQ()Zf@tOVOYBQ6TgU)g)TR>qFS+>{YI6$`9Be2&3gt&^nKG-xmyRWCZP
zYGnI^xQn!HThckIxWX=OG|9*bEjXHeS|X;RC>PlX18yh*S{GEMd{db}QYY;7T@0Ol
zS#TUD-U6n<u9v|U_I`<zr=ov;-Y=r1-sZM7K^oqq+_3i<XRc-AO+Z$cHRV6C0NGIG
zS}<6)<r4)pqxZx!d~nXLu=^IziJO^{tTxAHD>$)LRCw{#=T~`C_eW3-`7(rq8Qp`O
z0}1<kI$3pDw7jiiGOTA&W+SxxDe_bvod0!FZ8l1-YkLux8?|)My;)XpOos~lOmD4~
z#by_kZc^*+Ys#)eZ10-J4@PqX-^rD@tCy0dpX*2_*Ut(@8PWS(nXqZRe;Pasr~0YZ
zX9aVJItTV@?$$?-<5se3g(ejr`XRntQB=x3Nh63TN;U9oyTbZm?4XiIzIO)(REk1-
zQ^lJ=gDbIMg?tX~El;YsQNG&i5I+;2spR4?J8e3oM#|5q_AAg<+>+%yv7;DtnrG^$
z%@GONIejfo!P0+MpZJa`kI7I!bfV+H-l^A)?ygfY!?oik>zFglHS$as^;9d|l*9RY
z&lGkl5Y^cTGn0*OBOYOFmWMo*NwE<;Sx8P6fcuP#n@aymOz_8hO!Y$74z2S>XS}3|
z=U(AM$*1CL)y|$$Q<E@%7b`wI&2|Or7?z9vU$VQ;b7AGyE6;y*A~`t8yDx;wUT}v)
z3o12)6ck}(kIjWpwx(Xzsh81m@wA})&_!W*?sUkun^I;#KL^jVFk{oU1cVFHWB+Jf
z$jy1&(Gia(UF8p43h^}+%R)U9$jVtj$C~pwNn@=vPE>?&0B1T(o`^e8+VrUui*@~j
zH@v-y-C6zoK64|ls^_Oc`wo1%I?*B@X)o8oQGO6kRm?Xn9k$k(Ay$gD^9s}gPt}nS
zC(u@DyfWNCHO19p%7rmz?NE0o#h1nY9M`VJQsBXQlUs-=vGl%2)~kXlS&MfKYRR`@
zldfN97h7tgdxliW_#llPDIC4O(at>ogw~N8_W9}ia!__3Jl{|;Hn16B>xy=3`G7R-
z{Z^m%IQA2MXMjQ^YS2?Fb#tm#-00H|N(}Xs35%+}x|gQz1JTmeVe=G6o^7YcjEbhl
zg2@9HQ%+`ePXfXvV4`9wGspQ>?<+oQxn;fHAoLe^x><XvT&xoxM$PAxhNP5j;HcL!
zstuQ)dw(FSdl28k+?Z4r+<Jr>lc6`1OB!$!W3~7I<#%&()4G|xFb;VyJwTNgqx@T?
z*)R(dj<TRwRxmSG{hJq3Zq5?y?cCOx{tAC1DBnpDsPYcIn-&w5xuuDvP2;O%>(_KQ
zn(IrO^6I_ov%pXH`x|+}Bn-TFxxn1Ce=Ew#23qCVUm-5M?aSeT-fg4N@8>V!0&Z4U
z!k~NEB;=}iI~z8GqY;dTiic+<dpRSl`r8|<mj#taQ>k8#=Sybx&15iJbZY%re&TaD
z``RLw&%-M9sA0QnYy!*ep^V{$n0AE8{qNI}sDCxKmU`>Te7(F%I!aE~k~4pc^5D(w
zPhA|2W=wEP!WV{WN;+=0TaPh`_j_=(>cE9DjJURWePxbR#9lqh%(eRa7Q+-|E86|z
z&{8BdNxrJ_?+c>M%;oE?DEF-{x+Sb7W-Q+HM$jmV8~I)W`}ADHk8hFD@we5so?RKH
z`S1GS(!^Kxla1YGCymrf4PO$i^TugR#NqEq&y2n5Joq5{crosI<5-37{a*)4@z-E)
z$f8)n=X4Zzh$Yb;{k2GwiGG#zg{2ZzHRd7&*UiEskE)b$y<tb)lyQP=g9-VG4GpxF
znwMF6+f2FdUEkDO7BBOSiWnB?Is(~8VlSde=2)qVa_Ydr<eOcFeN)vM&6lxNQCexH
z+8MO67*?*Q%TV`Kcypl%{EON_f!1iZl(0IZM(b8qznpyqHxabmN(WE2L|j`Tx$cz3
zxCdHqaD#NFw;h%a9D-`6sAy@cl$H@OBlo)6V{cOOar<+6G)ld#M=|j7>-xQ2C1#4n
zBa@AC&4ov#*qZivDkh@2<5n@(XtkTI)wpn0#K%Q-A74~Vp~8AdPq;dN`P6FTxHPS7
zDyUC+Ugnq5&_mz*<ixh15s72VvF*zZ%}?q}n=LF1(bg$lD?Fb#<}`jT7LA2hkfsan
z37Ohk`Y74GmlL>EN?}7wcb4fyk6Y+V(6X8yth@i9Ccgyjac+y%+y;Tt?^73vckcVQ
z3xrw4GW)<cw1OO;a;a*+@B%61%<mz!;cnNy-zXE`a}8RSV{3Ptzm8xIqC%2$tmN_=
z5_Vz2XzveggMGRB^S}~a|8GQMoN{u^%bXQh*W&M7vzuw?4!&b8f3<M;p!i#@F3PuR
zx!9K5WWT@&>~!^7Ax4C9=u4}va>(?#J2u<5sB0*<4)SDy=%3#<AqNoxYuba@zSu6(
zB7e;)cX`hE5rdRcj>;d`SF7*t$eeN{Wk;^}q%|vRj~l+S9T`6>+CtgRz!5c6v$jfx
zcetL_PLYVobgROw5<8}JYyX<qzXQ9t>jIbw0lNAl-b~B(%xAXNw!mDdR(3dy?0S#5
z$gA2m-CvLDEV(6_Tg=#ZEf#6vbXEm>2Tgn5sZ^h>DrBQ5jiep<huF*Ko~aP@K%Cx?
zt>`}<B-vdqAT!OtY^ns>2M(z3@TiWG0x_+Og{!%ps<G<M&$KkO;@TEx?AS0RmN)mk
z$m6dk^z<DnGFIkCZC~U6!g}SV+YW4NWxOn)>n^@FQ;tzIU(4(@nLeLJ(Jc~iw=pte
z2#P92qKWTqsvt{*fxp!FFUOEEN2Z{I#eQK1+X+Y*2;#?RPqu85TD);t-QB(OcpuhU
zTrAf88AI#1YQ`+EX_)WMQNrqifutsqf8yX*>0=$N0_qwbvm@+NHR)*&7nz;$Y?^r!
zTklP<h1*+F9qm^Vf^Q2wzniA=@?M&Xnr-`ef}3Z?g22enecx)pPMl3Qh*;dyaY0{2
zJ)a95Q4XKisP{h8>^SFXHKQzmd0Rj26lc*unaBV184|4V3n1(*C`|EM?Y=#vW}p%f
zPE|ms*6p4*S)A24wP2nj^mRw*&?HcDd|=dr@qYjO-IBDsz9Z3U$KCgQjNWG<|NQ;s
z5WVktRz@lLny?-N*x{7t*55Ewf7IUMb$Y0lB<C1mh~%inqy(d1C8<otmQ^^@s`#v0
zc23x8{Eeeh4!~KE7K!uEtHfLWJR89gqx^0QqE$rkktv|@r~VP64)YsY#Y;LWp8Q-L
zluPmpsKUi;3x?`~dNMLI=twwf)6w_M5kCj=jMRG<o(-+uQpaS^fXxL_@Cf}uIFa#k
zAGG&}DSQ86S}ScLz=5PvafWT-$ERH-v!(SY{>3cbH7fJmQ@wusC>g7Pl-O>bHw=xD
zQ>dRe%91i@Wrp-hvAd>b75Ept95;q0sn0{roX4Ope%Q=(ebuo1R*(FgaRW(SgKqee
zCf8e<G_YnEh8{OLlw|sZ>z`3Ok9Np*cA{vM6le*Wqhp$(leIz`$}!-6U)lhjqCw@(
zu3yO90+|F<ct^<%QYu<O-psiUUP#{U&Id<>J7P}y$^^5`KbDvBr<>hx$lBHRFK%d%
z_i$tzFhgAL*7Bad^1ohywLQ)2T>baw`bi6n5d9S@#!d3IiXj7X&-0HNy>P4S?Xl3G
zyS<*D`f$0=l{9*&&vkjT-*q{&{&2orNd=*_zBK&k^VwT($(5J0*cqie{IrkyNk5<d
zTM}+2e++j-ap)Ho@=iB4w|zb{TTb12xmO!>rPVfVX3RH09%!T4X&sT;FRUDQ{X|dd
zodD`u>BDt3gFmy_tXEg22KR(7p2I$_-DA@hPWE!B5_NNMCOEr&i`F`Fb?E+YkbX9J
zC<=l)AttqH_Dt-V5{LC!38vjmH~B{qhh!eAWR>O4-5$*4GhEogp#JDcT7WiGu4p<Q
z=Kg-6*68Fb)bwIz4Oj;%uVtLrv(FK>Eq^XzFi=rtN*#w=nGpIOK7c8=JZhdD9i+yu
zUeWj_us!h?ylM-q(xCdY!Yejz+7w<%IUf<bcc?MAl1To<)>$Qd)6y(ItSKB<m4~$T
zRg9r7a8`3Ri8;v<YH@{?x2%+1S(j$YrL3ny9}mZ}((3#2VN?=CgnzhMphvr+0hYvT
zvr3eC0<((pIbe}wMLh7evE4)Zs$4VU`fIr!KDg+y<K&G{XH-}&y<t8m(Z^v`MKPE6
zyD9q<uo&H`P<}9Ce69<_nxqdsvLKD*yu^5LF$qz2dGPA=afVH<Oa$a&<65POTLrDw
zkIO2~I2<CaVT1`IZCrgf(pUs*;d_z3tR#Bdg}(p^W`U2^(C>1JS`4gy^n0wfDnoO(
zqb&iP^!@s*K_2q1M6bl{)8Fzg#H6`?O@+JyV<ukTmVGN%Qk#x`U0<607W|l}y#wb)
z1)Q6*0c{r?1<B`;XG9pzY-Ko=OU&Fy{fOSxzr4H9B3HcP3p!vv0i6|>M?)<k=5V9B
zkM6Pk+^flY8!VT~#e@mTm*sWJLVpfN_uQ99zboFuACO&gt1Wcb=pj2WbXw5q=9gmi
zyCS>=jtniG5y@5CSE6sr2gfdLfVE)N^{S7+$EbO3ReGo6wU2{6UK(R(IpIIdvC0xt
zgD=Ph5{w^rM>$j@s*8R>p6=%$BZoy`i}hC7Ti=ymWHDd)XSC8w(R_QbE}pHB&zXEF
z=-<)27Y?I}_pBZH(49n*$x$8!7HFfDK_`TaU$lQfHJY@47D|8fSpP6>&hK&CrKmCU
z2-W3bu$+-I#BX87mnt6b{CIHFh}%Z>IjLGv9sKQzLqY{I3(Q|v&rD>!?hVSqv61$V
zW4@lV5A5%_TX=m)RnC6);kg-Q5hI!0PYjT4!v29KD5G_eTVPH}U)2leE8FS)kWo~!
zT*uqUshyvWRqTC8Y2F0G-@Catdv0$TV3!QOf%Rdq6cmRssq~_i`z%6hG-rJHC+oz=
z4f3yTw{@N^|3V+S>iGZ8&>(vMdl<&hptbL5t`o8qwf2ZSxmn%<EtC4CxZrT1_JJ$6
z+Mtc%0ii!mwahEq1hU#Jgg+Jw)RjD%$jZdk#=$!c%Fi^6t@irfL6>GEr^%&;fqUE|
z6L=B`nuEcpyi_(Px$rbp`b3SG2udUdboZfK={gN3J`CyhxmIliY^~Z!8cgG1x0p_#
z9F>E!i%F4x+Yg{BtYdy_8mMGvR}`?{`;?Q1Yn|cLFtv+43Cx7JP)74tMa6qV2!FXc
zkQrZMFJL+(*yxR2aNga1IlMGmB6pQka5{frd<Iyx)O?BA=h@)sK^L^}-nQRx3#%%i
z{iBi1dRL*l*{UnUv3#pJI%PU+832sXYt+|%KCL5jofUS$sIqN?HZ}(YAAS1s8@e2s
zbymSWPI2spx{z)-uncbs_(E6{+!wwOLe~Vb54m~MW%I@DvYV;rKOpuhER>E-Th?W)
zO2dp>ro{+Te{uTSvb94p^h?g@<Q`(Bz+EGH50*@QFyaYN_v%{*TX0`&-1il(=38%&
z9{shcIr8ghYlWWg_xX)G6C|)dNA!6&UgOjDOk@JPLAcQI>h`Jkv-gv6TzDK`Zk8%r
zVKxYcobQ-abh|GZ!MAq^l`Wv1-Su(ZryD9_{Wk2`27?_)KH0#f^S33lYMt5~vsB`&
zY-)(Z?`O(YF`43~(}@L(y?^Qo7311)r?;Vvy~iA)q}g9RgZMz;2er}D>2ZTQdA`|i
z?-@r$&eEf9C~vt$+wGo>aZ8v2OYY;3y8EcnhFLR-zmiQqKsK|5ZC)oY2;I@Iu+R89
zcgef|$ZT=762~DnVzr*u6bX5zeKV5Gy2n^cOu#7K@@H9nQ^5vAMAurRc1mxSi93?U
za*|%wn)B-qw`GYtbm>pHD-U$j(ar3F$Nb?l&xBs^2I`tsHissw$na)NV;km#(Ba)&
zu;x!VKlk$o<8$=ag@^oD`+LG`CSj>X2!S@+F;42k`-My;zwjSJ(<>?UMd#`-2jY~6
zqVI)GzI%B#uyH~t>LrlLd;|KC*-N=c3X+1#b0HDO8N{kbQd&%$(x2DtRsVEv)0@Tz
zKbgjF^gW%vnp|J1Qvj0x{`!~R>qA}1bqIsJ?w7kxwosB)|AD-s(ErHEnoxQn1;=U3
zRdt`+klWWjwtvz7-a%jmMGYRPiqtxAM=}YKBYTBgZ0Ei+wy@=pa9y}Z>Z*B`V$2KT
z*4J(+c4IDSCY08UA^g3;VrYx|2#~z#rS)weI?-dWH+90Sb(oMpGz0zk-x4ZA_7P#8
zs*opL&f}i>j`3}7MjC}Bog`nsly&{AJxi-bwVBc|I_Eo4C5?3#OA-9g+rb0<&O*hJ
zwSP)Qg_iZ%pdq;`=l?|oRx!VddXE8%7CP#WO9b4{%guCoC?h@zP)fKOvp?=h&R+WB
za=*7$G?_USzs7W_LpX@6ecFF^(d(p|=&EZDqP`se8;qT|Y<yPn)otviSy%Zh{*SNf
zVAU^;tZm}y1h5d!P|jxWnn<UHx>^6*g8}(O;0l>~@T*MwD=nsCJ=SHZ4mBcFenDs>
zk@jl=k%O2NenEWhP&@T|wVJw^M2E6~-uK?N*>fj*M`T`2)jmHJE|$|8RrOMP)=4Sr
zS-WCEYk&UkwzX5IT3vMxmg27Uf50VmCg>-kPG+Ajn8I(y<S*tS8u1D24AgI-a`|&;
zHQ>VQrS`{>i5S|6iI$hYigl1OI@@a#QJGfxcZ#L$dU<e8f)+zv(`&R}Ij-s%eaJKX
z1`KTlYarvt7C!=Mi+;koTj+Kk9rMRHu$eTN2%cKlo?nQJE><_u6ir|*xUDr1*|NTm
zqb`{ph16JMswcaul_5Ve_@TRAynzAB$H3SSOHYmss=1_kU<ufxRbQX1ddk|ooO>v5
z5jo&&M-if!){N~S{tf$e$nk!(YNlIqQYKe-`c9m5ZEeN^cX^<(@pefyOL^Xc>3(K=
zAhk)?YOpUo$15WGYD#^9>00;I*GZH-z9#8*O1Eoob6k!{o;j~ty_Km^TQDak^tNYT
z52LY#d}ZdvjL-UDLDH9Mw9CCwgp+<6`&~u#Y94oCauY^JD*2q3^EpkBQrjG5D76pU
z1+u4BLMsGTH|RUzqi@YyV+IgB9hJXZ{}>z?l*HX6(am6v?c%vFlwWHKuiOPW(^_d!
z3wA;++}U>HmamJuE(ToM_w1%_S!M7$K4F0lCd&=&{DCxmOXXPFge~`e68MM2#ywg1
zmI}<`xtFHyu#NqIrCFoIE&_H?EcA=2kAP1|g_oCCO~p*FYSNwM7soUhW49a*<+O}K
zHf$ZqwMC9=&m9lgmPz?MKB%r!_65D7C+DAVDc6kc;9ej>JBd*0M1P;`E3gmoD&gsz
zFrUM@FWebYqXwMcX(6qU3pUW@+&G|K`}}mEHIx}XeM6WNKMtazufkM@>-jNu=pIxy
z*hajkdGaIh$<;h>bRk!!an8rjE7_Aj8~IepY)|1kyb+_Z*hMRplZg8XwSBNZcZ;0Y
zQW#?BJ7wQLZzg{P!#AyFwp^eP8j9&(P%h368h`z5&Nr~(^NdK(zxHPWLbqFir0w(X
zFH?rezh2<J+SgE!8VRJBm9^2G?Nt$ZPv*kmlnluW<7xV&&L`rB{E<75FV~5dg2%%;
z5xUtiWo2dKV5KzMc7MHr?s~VT?>|`Jw8>A^XD&oD)Q6^IEmI=e&oaKas9%w!^E9a|
zWF6g1N48S?D=5T2jXe)!PSX!w@kR~^$KVdVyP@we0jn>a4Uvm}!NqAk&!GULPj?(@
zvcqT$>Mj2&j@lesb?!{djQ8=$3R_IL!^g?M1+fsTQg`ZPDVM{hs?YU8{Tn@B9fncf
zFFi)>r*QA$Pc~@{<PIc(mlB*u`bV-!^_-cjC{;y+l6P36qJzEy_UWg`!TX0ip7V0l
ztnz{r_;T9voPKT!hgWdZk?)|$uQ$Cs@}sM=#IzCNlzJx+PBKp;K}%S5x8#nPlwGpC
z$v<c3aKdGHcI2xL$k`)NiMI-WOQ1t3O~|>cCRb^H7UAyJsel_xS!vuDCdC+07wUS6
zFx@Z_E_iO8!qnLBnV4qZ5O8L3lT6<vUy8}G`uOEc#!dQ(TKHzBqol~xkB=Qk)io7c
zB9#2g?_11Ttl8SGY}>h-yNBy<n(Y6WX`m)$P+r7&U|#N{7;Mo?8;BLI;4~SvuL}yi
zTsCoVOjb>Txx(K7^WAZthZfj}3y+xqi@A;nLHE1Q`XnnePP?J3eeb#KA)wSLDwae1
z`e|kx*@e`XPX60T&W@b;>KEL?FxIb^)iL>SF*^775cHtuW~IF3S}s>1_74TIGF7uG
zCVKMbO1ggH)0(^oXJ2zUHY25)=DpmB*9UFy1KfN`UOMLUeS1qyZS){Y8f|@tp<X06
zOsM!k3dne4ZuZuwz1%4hzOYq8prvfQYh5mzb|1RWeL&wW{QJqJ)mX?Z?h@^9g0L};
z`RJB=eZ8w;hO3j+FIa<&V^p?U<Ggph!yHcUU079Ug;B%9v^gAnJ?=1aDK@Y%_v0Yh
z*D_Biv|2Xg;|SXyJYTl44K=FAe+&&t_e3zY*cxU}@9QO;81`Qb11iR|1mdumzxBie
z{-?+uWB{ypXQ!lPy<~g~NE4Ty=pIT)`XcApS`Fl{=ZIWtqI@}!10QQK-RdHadAyZ)
zTw==6@3noouqeP%%EJ05!Zx+%kf|T-*TKOZjgV{VD`XO;IFOt3H;=-o=-)kzwzVMn
zO^J#lChe~ac(R9bqHw=99k-c|a{i}2&Z2K%j&xrgsitN|ACtdCg8j1mX7)0E(G~7%
z7nAcQChmel_R8df#Kww8<Kn_1sS?ITY8mHR%JFlDZ@xXY%SV%%e<Gj60e0k{zXiDu
z!=}=N&SjuDWDmtDF-<$GY*^RxQRp@tvFgwF{$DS^ML63rznrvyc>IEd)S)r#T+aIq
zv0(%Dy`9R<jc{j=p7Yo;IE$>vl)#Rz72V+cMR8ioq^e^25##8`NW|v8pd?rsxx_bd
z!PNvqHcVO;M)%!`(f@@@4Jhv~crIm|?Q95pF9;SB^(g3<?P>UbS!mJt7IHMc8}7-@
z>tWJ|Hdok4PJ<`C=b%fqd04qU4)q6r*vTD8wV6B4<o8tmdt1%`YqmZXaI-8EYjUg3
z+d9dMj9;7JHUmL*95rKY<3m*}jtx9lSD$NuaU8$)lka-l^2l`3>8U=&uCKtDk^GBR
zlmoAw>HEqO{0V*J{ACw~t|7ArFZVqAG(lsY`e*)rvgyBk&*gF{Q#b3L7FO|1<0+Rl
z;fD6ASyfT>4^Xnx_&YKE>*aE`Wpmx^k?N^v<;I0OG$z>nb{l=W@ukM6eJJgdU`w91
zQWnd<Ddj|#(m(D=A=i*yL$P@cjg+y3wUchuqw+&FewOEMZ@ATv@o6#*+8=xb3`P{+
z8VT2L%+a^!9$Lgn5!nSMFse^y>ez38*1^n?r83fO6_>|@9gi#!Zq-#uW;z7V#aJFj
z)}r_5Kbj}guR2%Xpgnt#ALaJiHsdEhrEBKxkXuOVM<1F-bX`r2#R%w8NGS$im!|*P
zl+!WrD_0InG0v`~CUMexc$YS?qgrJDlZy4Rg6H(HR$<gr2P{aPtYbNs*TGEkcI`)$
zgZ2I<2{Sh)k#yg!^SNzO>_;m?r^PL9ABrWT_E$!^anLT(agSb%oZ795;Qo@cu0qJ+
z<cnDMscRzz+%L@nIigp#DS^#O94`({OIt(u5*T>y_6!a0Ux0Q^pX#+zKgaR!xK=mu
zf9BtB-CkdUxHzo*!c7Ry|MX<|P_h-(<dBkGHu4`Q_t{^KsML;<i!WLV!z{B%W>IPg
z%=Wf~tRu}8Q)4*VSork1{#Kblv*|~PW0Ds6tDwzXZP-2@y=i}@ZF6vMz`~ez$R(A#
zZ<u~QgB!@0aal3LZOh@(4@AuM9_j%P!ghy8Jm#uYZm(N-RuTC_l2IMFQ{d$!d@RB|
ziBdm!R^yzYnUZK=EVX1^JMM%ka@%q>_@lIAiF!+sg?`!`UQlDfQ?b)$u%3I|9@d@T
z>)MNC;g`CG<=)B-&sTP~_-}eA>7obU*nBfXTkq}qtIIO2?34bFco}e<`l=^<aIq~u
zl+|;UME^2O5^f}~qbUuqI-tjJN=0}1XK9my8zOPsSycprK2lUWzww1~GD}+$o9m9n
zZDT1)4Zid)w0=)k^UCybzBAgp6&P~?n8_Aco;de3$ndl4dmN-9pqsFUYmWXiy&LbJ
z`_c4ojR;SF{~%D_3>aCyIq|3C%psS1Pa1<|w11Fbp8aN1nemvWj^9?jxIVsjvT9a}
z;%2R==}=1Wu0*u*REIy<HCLWq<A4J1Qpsg~&2vZH?ro+e_2Y2{P>W;=9GPCTMRFs|
z+$xJb<nXDFn0s}P<@KNYb=R&>czgT!92s}n^ZIid-%bNaeKZZ7Kr?f(<G;%rvrBoo
z;{j82C;zE=tdnS!M7Yq{AEhvAab#$bv3;>;w+WY4Ht%WvqY?HI-q<IeTQTU9)aH>X
zkp*^FwfO_rnux7}tzkarY3?=GlDILBTm6n@rXQGzT1T>uCduY5M5Tfl1FT&2VBA^U
zl(A@pH(iTqqpA(+Fqv4L^-29HS<&D4Rkmv~frM5qI|teMf_2G`E450~e~&ii^$tLi
zZnd@MN4y@EgyWEF219-(U0FpVj#%SjZNqo4+&g@*Q3CB@y$R2t<+Wai9d`gb$_trn
zpaFg)iTE#BD+B7aaqy*hN>2IQx-^Z45BBsk@1<D`N6Xzwi}5TS2t*Hh`o!t5Sr5-O
z+@Ba{o_R@FuvQy~I?eUhSD$<NXE)0CWVYmm%^oxh`a2=urAEaq&Y4|9YF8cvYc(Lc
zQ%8l96ULxKv|v!~2o`i+PDB0OD2Q~BPa<blJfR@D%PH%B9dtRcCWl8>gp8I%xzy$-
zv`t7<{4;cR@YpWrU10etE~?mJMeXC~BayU5mddyPYUKe_6eQJNEh8A^l9~-=n;D2F
z5^7Z2()Y@@2E8ndcW0|`oq^8Wdgq+psME^JET?@5`rBT;cihE-%PI&?kGME`qva1o
zL~Si)UQAgwleuTW;_XxilDEO1T0>NDIbS%7RL&|;izD&ZWkfU56IwbsOHnOrQLa_S
z>>~!jS4hT)XE|hVn}HlX+<N8LdN0qh`or<KSMXla2g#|^-O3IYYsE(&yZ$O!sVU~U
zB4j*=Q_Dg|St^K4pWcrS?V`EI*})%7e;}M7nb$ZOXfvuGq^2EkNzRqjS$wTGm@Kf@
zUhC6c{E(@8*_Z7RM8w(5AMarZa!)}|Uhnp0Iygcj50}o=<dQPN{d%r96em3ZHzIL8
zBQ@Xe7jCu{TK{L?-4whcvih+dow!14*<QYswV08u8Hneoz;D{u76_ELdaG$1?ZVrD
zZ0Q5ZbK!c-F4e)8{jM<{dxcMV^jan(6V;lg;LcJhJ6(V5hR_APF9(sww0?g&Xq;9x
z*<TTkUR1+!)kxNan)y>6Ra8&<NIPvqzT~?w>W9<IGxno?VLKaiM=ii}nA-}l{kCgb
zt1i-3FPn8KzHs-ngw~XVPm+~>zdarF;TFd2*c|qKn(;^CuK!vOsJ5=j<E^oOO(^sn
zl+Bt1{ob@_`Qoh&h(&PcwEKQP&MG<}I$($W!Xx;1UDT+K<>(2?su$4qt)70+`uMq)
zH#z2ojc%nfw}%C8xFRgbA<(?MtK?n2XBO+!eXEE9AsYj#xW9?;oC&S%!eRwbcg#iE
zpp^m~pH#{te)8ii^8R!Rb?9H%*<RR@v%i<>i}+OKmdyqU-OSE=kM-&Yu|-c`3GL8U
z>%BVAz%>5RT1~e*j9+<9*{0?nNIU4+9m}&AZt&cW-9yRfMu})%iNvbx317+;=|9k~
zWvIU=9K7m#&`ol&%G_X8;Tn(}JJkFL-L*RcGe>>;mA2HP*B1p%bfSQvF}~(IG(@v}
ztmL^4{M|>S=Y#Sj&?fzuyZxB{wR+A5Sh&IJSi0EEc`GBVy!;)ytNY@>@7gcAS2@P&
ztj>lnYy#cStMV3Nl~p_e*4?${X>-a_xLmm_=GHNoX?*{Wqqz7eRiT7&PNY@i`!@rY
z$<B+ARrDS|E%b6~X4{qWSMm7iPj@4#ehv<)ojW`>(NCTcgstj0fYIt%!p_)UM}1LU
zKvt<YEOzC`@xlGF$RqOt*3-qNH(k%?sx`K!v02N%UW(}GgcQ&rxv<Hip2XGDh6b-1
zvsGGpHRj#a8U`OM#T367&>UNg{dPlS{X6*OOh2ddMa140t?F>yUa5a)mkZ#%h*Ql|
zuQ5rI3Hq9?(X2A!)z;j_>F}1`toeH*Ou4`!0Xy|w0`l=~i4q6Sef+d{WiNFGzVPU!
z=Iq8O%UC~)wD-r=`ALN!b-y0>LCkGfsY#?#ed+X68Kf!9{VWkKlI?gzS8|`Q^?j9$
zX~TF+#|?+TBX@c*ND$xMVG(pt?OuU!yhYs*uH+0x+=1Vav}*xWKTwRY=*L=cNhgbH
zd`Opke>O^pHitpVY}{@AY2v9_+Q&8(n6&}_lBSX2B`@)Rr8l@X?_ykPi5_^-ZvL>X
zR?q-$%bGN>il=$khIL#P@<Muls8<y(mlPTvgFqt9a#ci37gXsw;fHkq9mUUYLwm{~
z35L20!~I(d1brWT`D(BZas=W!kbc)DF`#z&kmztA{_oc^7NA)u&{99^4RV6PqcdwQ
z_1>noaJ=T8@NG7>xdXixyVu-p6O6pQ9)riXWo<sN#g<=@ZPminY%vGD$c4-HIzFso
zboK@`P?sfXrqY+o17-c?OMLc!6FQioaQu6-#)IFE&p#Ds-e6}#ird?aZ7)4b0rEE3
zP~M{Gcl%Y3A30je9ogr%3BMAYgwuw^n35cZ{`l8Fj+|xf={LH!`|5SczZh<xJf5He
zI^h2tK8HJ$iWgq?aYSeZD=JcPeb#3Bh7BNaXxdai52t!{W^86{e{<Z_E883eJ#Oh@
z{gWO&Mfn4r`n$BpPsCkTeE+>e#_{0sH<!Mn7=%L(zyBG&+eP1~<vW6nZujeXn_-a>
z6Ph^TmD-;^26_|=2UY-PrPffp%K7@YXs7JnYgQ-nP%BU~qK8*SnGG3LSwd*uG8>0h
zo4F4@@=zUS?)5txEjy&y&&e-(Agzv`10kLA?pV;L-I34|&r{cl9TmdjPI+LHmsW5>
z8x5Mik0`WwwmotzvGLv3*ahD{3O)z<%u|n#(mBDY(TA6-3$=Y%eZ!5yZmg7B52kjN
zRO`wM<?8Lr5G=t-<iu8jYjY+NO0M9!^9Sflp9{M8WCIn!G5tLxg-gpn9$8PPqs~&V
zq=R;z?dfA7KUyQT7;E6VL7sP^yC=aUj@6~0lm8E6UmX@z+BYnSODLkCprosGOEXdy
zNS8yG!q6cx;Lu}}(p^e7lF}&BFi5v7FfgPDNQd8jKwaH;pXd4h@VXGrId}ZJ-9WIQ
zlBe3jFlKR}r+a>P)xrifHO(_pGj(tH0MmT^C92){kNmCS5-ZISlZ}R5EQ@6MawX3`
zrTMC`<g#NCGE5^f9*B^B`%ZC|&iO(|PWRYasXq+Niz4FWA@zj^ax$O#R2M4XAQ4Z<
z;Yhy^$`^!+sFf|!6)L5sUR*ibIxHzeDSR-DRU~Qg8kXAxsScx{pp|1-;%kpcC=L1b
z-I-_Bvk?t?sgASQ>6gHv?C114c3r|Q&;oMx%b)KWCd$#ll?x%xJeT4^lYyGQlJ`t1
zu;S?=%ZhiTwkqwebu?)nFWp9sxB6Jn(FBk`^Za4JxaUst{#hAXRjl;Q1lN`TKY=vt
ztpkwLWp5E@!S8sL_f*f<_nsF3pKzU(oMuyIb1*&tS^5Khz(pnd4p*}X>TC0=1O02%
zSsqA-WRXOkc7wMKW0aeo>GeWZX@gj)_K3TWj1e6{jB^6b@&z{#ih7uKADUl2Mp-*I
z{vbhYJfcoplH1-#K5pHs+Up{-``15=r7v6q?w<ME2ih5WA~RM!7{|G{E{zw>7%kJ)
z)$3s#h9wmoC?#Y4G<UmebRJ*38vcAv%lD97G*Pp1xX;&0e?jN=JPs23yCM6JCAS_=
zGJIQM*xaZ0yeMvV0h7J62gHRT(8w4DOGW6iy0r>yW}Yx8_6z?R9iY{V{nOXKyC;Rh
zn6&PCQ?t6VIC5Nl;bP|6buw3#V5OotCl!`}Lf*N-T_BNit%FhL@j;{usUL4qe8uLr
zgM4)L;y#M)>E{QjYZr1^Y&cML3pRFcx^LVzKPx$mY%o`n=uZ6b1&lwKF4t%qAy9jn
zs;-{KVX)%+7&BSBTUo1%)ru?-4HN?@F6{$0>OC-K(wSu96n<BFYnpiBxm~fmlPtM*
z@sWZ{0&7qS<=W&-V<V(JdCZxP%k9c<Jk~ZCS9kU-v16)Pq)>@mN*)~KnJ<i`nlD`c
zwBLtVsNV1A+AKOn9#R?$A|DC2VU?-ztT-`L!78xnmCx7rZnt#y_xA@rfNXr1_UxFh
z_2Yz4s60!f#P|Tx+3tT;WW0tSbv0CKSctzP<i$pZ_E(K-pV^BEr~mxg<#wSwJ&q6u
z!WX#niX&&GOJwqSWN^C@3{cX+6no!!iY>q{b}zFyB8$Rl9)Ywq;_gw-xN~Wr3#){R
zpbR$!@~uj66|#PWZ4Sr(Qa->c`?847$v$oj1FZ^5%^mn%gPJ=JXy|TS^P+&~Gwf@B
zp<}rC$xS3xUDH;<<U<T3A#U5go_TU9KTJRTWFXDhtn{^3-Ua^^3~%zBpp6!T&S^>h
z$-kfg?PX<GNDbpy?d{<Qv)w15RJK`5_hkjC8y-}5%pNf#XeFIff#)YGCgtj>Sf$-5
zDsNf4w^bM%RBB20&U0zN)268^>tZHH_k^Tyt)oKA)ymJTv9kP4#cOdmv2C5b(awFn
zU_MgM-o0gz5%4fLShw3^8Dd7sXP5c-h6@r}NINFjg<pM^(=ID5^&Q{I9d8{_N*n#!
zd7u(L7z%X$%Oe&}C>X2%LBfW(@@!hD7wls#6wZyQ*{~ii-z#3X>t0PumYQB>!<p+}
zoF2tO!R;jV4u<_MhV!Tkj25^)|Dn%wU%dd|<N*9<NRvip_%9+dRP#;WY=iZzk~IHk
zJHR%wnl1)0iG12S^`{WK!agB1RP|Ob-Ax%<Nq3#@v~ItLBDANZ$0~sFsGEYZ3E4;~
z0BPq-rj`%s6|d1v^95jN_7fZvfLpJYI$m&#W=P1m^(F1d>M>BPyEIOT5=Uav({2rw
zzO|sM;97mQf59d>L;b6hxjf;%f0!D}UNLXEssrfF<;iP#4`!am#b;j%E<}$IODlvE
zw6Tp7D7TMA+RlefsRSaBX+!gc+r-&zrbQj4b|H^I8bkz?3PA-@A@o`0lJlgbS6l13
zk@cES7v_hr;kFSA=G}kn0I6Pi!618P;xUWZ1TjK30@J2T)!hQMVc|IdGR^T!S_^A_
zk(h}yWV=C%q+b=+EL7=fG<Gn;T~Lzh;aK^?Fd4s1;yLEH!`yPPQgOLmh^8KkSGi1>
zReiBNuEu>vFh0iLdq}`$!i8j3tgPolTmn(bJw8&nMVxDIGJRjhU6js!ohQ&&dmbt1
zCxrGKKP_TpDS}7Td$zfw0X9)V63X&brpJ;GUkdhG98>^vK2x`iJJ_)M=4=mgNmB2y
zR6%|<`cb2nj4S1Mp{I?K7jzmAi^b#fgs9S>d=vJbc2;!w$tFsZl;q?<m}R1w-b=C7
z=F7sd{KI|c^Bz2Xr><NcG*;<ix6+#~f#f_P3A6;}FMu>_dme${-N|Gde#}mAY?GOX
zz=2Eb$f&7I+}_x7l<2+2f-W<GJFwya2p_3DKmx!OKc`z(-S*sbfddy6UuGt5x10U3
zeiIYhEh=KsAMc<pCS0ow8gxtoQ~XP*xkrBGv!@+Bm5Q6u_eRqahcp=W{vs^9-L|k(
z+L5txW!;NjV;j<#s9$vwhakDKa&S3Fk6L}uugyAa+bOO+`;=q7XfkzVwP~5{qq)Jv
zz>~y@PKI9|QJs>ORcd;4&)5aFkGto+Vti*(A+}NGSUjbtaby|mLkwHfwR0tR($-i(
z&c!vJc5?EnzfSNBW;nW0dpnL@jdK`PY6>6yD)&6!KWYO+ata>eu8oVnJsXB`(m%&>
zMcID<r=Ox5E^Sc8SHam7p;r`$iQNz<E7)(e+mjBqDi-@F#I-lOT1*pguu-j(q=?%d
zO=-VLm)Qdu3K}d1#<KI-y=KHQ1JS9;Sm2Etzf1;8d;AH67K3Tn>nnIB=9Lfwl*lO(
zP2j@faM9i@pD$eH%7A`s06)S<+4nI$$3ZwZoYe@E&mWFRYoX!fTkQe;jP@^+or3xd
zJXT$5>&}UZoI=0~rw!!a@4e^QyCGqzcoDe_btbs9=u*2oc6lk|F@xZN{HR!O@LJbI
zh1D%9dXo-g^Krd3Qjw-bo+5G9$J{C6CR$aakDKe*F3$1NJy;7uftH9J+i#o1Z7<|(
z;DA0}GkotvoIc2=9wJRGc#V&GeTfua{w^jq4OtXT!;e&IbXXH2wMO=Eos8TI?+|_|
zMT2JEMa6z?n(}vXH(#YrsFD^XZ7Y9qAuYBM3)Rs-9;lx7Y5e*TY;oDZ1RL#te+@E1
z>yzZS+XJ>}87g$Pi<JZj>6}!S;HhYdfP>^K?9)#;mDy4$ZW>U8`ZYkE`<l%kv&21k
zAE-ImXAtd8prK`ypxt&`CG@MKF;dK3I_^TECeG!+xv9QSRP$m$=+uYVWp_ht$KZ-%
znt%xtnO_1Q`ARx4ZalJ=fU?VJK40KJBeW;J+^Gq=*o<fduWJfEvOPKY?xlc|Q&4*&
zV=(5K4M}Wmsu*0`**Y`!Q{ur~;%B0muif|fnqI`Yq$bN^j*nEirecn(UgI;8wn4{r
z4xFw^@&+hKb@xMgewu9hs$OR`$S<O+@<c_w5wi}8y)`7cTBih?mt4F@#^G><HGc8g
zgz0|!i}0c}Rbfx*oUL@PophUbb`GJj*1O{Q3`6NW4#9ry9NV8#W6!2IV7iKp<~~Nj
zxZ*Qo8tI;Wo7b$>sRGs#!@=lgG$F~Vimm}7LNiu;$wHZfk9?x$m|$(nP0Nm?M}lXX
z-(+3?@|2)s+#~ip(%!b;w7Sh-y~Gwdq;wJd3x&rgbL;kT^^ChuSHZ}(c`Bb=oUUOf
zLuKSyYWMBMjl;RJ`6Y4BEE3JTUkOh1Y{Wh%?fDe7%ZKb2q-Q2$J81ISYud7|?Idq5
zui5A*y0ka)XF<E;tQfDBb9;+<L|N4|m!H3HB;m0h=@7g`XA{p>{||}eA0|(q!1UeI
z3AqdgGyp|JqNFYOkAh*eJpPQ&V#rW5;_^o=Bas_2Q!ltKn_%BUNm{z?qu9MZ;DoeU
zW8Qzb;%BIrf+r#@&7XQzG&wgFxL@d76eGK~K)-^?+0ImV<>xhd8e&|zVm@C$x?=Ih
z{Hl{0HPPB#IP5f(aCF;NV_GD1!n;RBC}t4S)w&XEC@%5d{tG1Rd!;=Yta+QWrYGHJ
z?fPfrvmOrpIk^!E$pua7lb9Xzugj3_RQd(AbX|I5Sjh3TB)7+$Je8<K0CW7fje)jc
zjP%Gdl$Wx3d+bStmDZrxScJ{hJ@L-Y+U?j{wnmzrRk>PJo@3NT4h~oEBs9&BvxQtE
zR0gB8FT3YVyM5P;?ikodoqf?mB+j<DRT!3S!7{ZXu$RCY^xS~?xz=8{^(JDmlP=5&
z5n}Fos)O}gd0^YROy14atT*n;m2&ih7xzax*6(O0cTUsZ|Jw2ybvyxd1p%0@q%vaK
zJ|`o+8?WA7t*`+Svo(V~>@GfSQ^Z&-bd2C1d{zEd+??Um^FENLb9`V+tk#T5C9=cz
ztuI?fTWgtbMb0RqZziiv!^ZEcq~ygusDbB(+R2#HW^>jW;XZmeI9DugZ`97J8}vAf
zbTVhCK243WTNR~!mnR#iNIdcwq_4(i&21~>-g2X5ZSp~s5T+I_F%Hv`IeZhcVYHzi
z)WIGj3T=JE*5;Hj62_GqS3=-$#dX+RC7E~(G%8@&ADRWZt9p~<k>=-QjPSEB2X!59
zTq`Uo5qwD6v9)JW>u4;{Q~Ut(efasO32}{zMJiXku&Vg961>|wi7Syotg&>oQJkKl
zBWrt)Bi@31TR`h4APOTAphI!f8OevR0oUT)_{fwgUG1IFnQ-K>#hRNv7+S2BGsr)5
z8+in&fDMJVM7T%DsOa3>g8}{eLQ`yvlODrC!X^&NZnDX4u1Un49x-*^yP~|A_nxAM
zj>_tD3)iFwufYo*ukE&m+40G(Rc)`AFP%lLTql>$H3vM2cZ`%YySTfyDnF)wnlI~S
z!VBA&`+1TMbbMR})PCttGb{HFZ5A3EWPK;OZPnF15I|o1Ydy9oMU=~SI34OsWbuxZ
zfkLaFhJM&T2o|yKBf0k1#D`;)rQ|A+CHHl=?*Q4OD3Dl@<E~#1gwLy6MIeg8&-2QR
zs#%KI&sD@ktV~o%45Z1^BvvnN7Q#Nb1Fs2BM9F7F?Nf`9KeXO33=3;oKh9Bc2I<GW
zeK#K=!Ypu*r=p?o_DqK?5F4WJ<ebG%9Hu0~g)7_S!n#fkC)}k6QVby*QznOR6%lz`
z+$SzJ_~O+Je-`1~qAKg5oHYl2=KLGcg7MmuhCm_Ubhs`oPwf+XngEr>G0vh0CH(q|
zyKN|t@bwgeG&l<W2CC>fW@3RQF-@UWy(+%DE)ok$@QD#xzgAf1bY>2eb%iYI5z&)|
zwQ*c(y$os2OV}u#h>a8!Ds^;rwPrv+8PSog4mEHM#KeqjLRktU-AdIiy{a-cu><Qb
zZ-?hDxbs}AraeA0_Qt--f2~H_!%uhgzI(W*3ny<U=4Fuy62OXcTfgBgN+qiUB^_UA
zg3X0Z%s**q=7c_1c`Y)kpxfy*t)$n(c4K<}KN}bQ)Xan|5@S39_V4_OD}}0bC=H?@
zOm);XlcxUiWy$?vA7ag3|LNU6#I>?V71rr3Ss2yG-1&$(mm0nuy-%l$XVvIkM(uCI
z#FcfHA#pva<WkX-a>-FU!s<1YZNvK6A+oZaX<ycd+wF{xEK2t&0)XruM~(qKbqOId
zY`;qT%4Xg*Ia?V%RMB8yv}rpWK&^aL&OM%>dm?>)=5vYz)=10()mJ#0k*i}WffmBo
z5A@Y2Ind)>9IGsA4)87HmE3fc)U5iN%EL8-JiY;?z3VQ$pFv<#vH~9~@^xXKeFpt|
zX73wZVK2UT`K_$zQ>KqB=)yp=(ALOj@rBc%t({TuX#}FJ5jPzb+vvTry&aF(w&sI}
zPk@F*nepk&3-W`##Hhhb<4x(k=e@oVdhtrQeXi}figIG|$f=ZW6F%4nDXJ!2rgvYh
zIHm2=FPj8?K}}1+f^cpFJzX*k`g)+?VHq|QaEHe$S+rKy<lcyXg0`?1NKM`v#l+9g
z+@pApz~;g^aiAa-?i9tOT#gFYl*fcf!$<u^43H9P=gqqJ{6NNV1I_qWW4XRpVBRiN
zKR??!nWb56xzcVX;W$#2=ZSS)0j<gmHCt@-V4bdWohR=7@CEc2QFdoKoO4b{#)J9}
zCugr#-^4>|Kab*w0O^eGj{!H;*RZz@t;osV%WkU((d%^T4%}w(T4lKnv*UhEU@_p{
zru#{)w>ljO>~yoUL&K$k6ZMCMS5%(1qMw}0xr{we5fs~<zOW~JxxjxTX_TXL#hQ^O
z-lHSKv0QRg`w@8-Q_`9q7c+W?`vGv-);xqVzJ~rZSKEe4o`q~Ps(}Mk3+wY#c1}k+
zDN_oDM4U@ZTZwtnx$p@TC$%Z~htOPrdO}xLBn_74p+1?9mA)pHv~}%?RnpS2Qw<g9
z&b-o;j?#MzJKZ;8aO3O?DQ%gg_WYRc*}J9IAPl)-Rny%ikuS_8pWbm>yRT!O_M`zw
zICt?J^=#x%PzAzgfUWJI28D99>=J&??>k#_YHv|~d?7FKyyKT(*Xc}U56czcp+kLf
zGx(+ta$30*ccAnyTz++P`TAuTK?G$63%?_Md2{e?cktT|duwW!yoCgildX8t6h(-i
zJsMRzyI+1B6E1(5j7kEOCnd_)WMOQE^Ed}&?c7F2d<8hm6peMNa*`LL!X2Y3KdtBW
zycX6;aDZ$GR$!-6H!R5hrQ{o|#1C~ChskE!UKah_OFF`+hl$MtN&P}Tm#vQjupZA5
zJe9>gQq$Fo7!lXlkF2onD^rnNi`;lOP_t3e06AF7DS63y65Ll?)JIOq@Y@5=+1uvh
z%*ksr+v53FiZXFIQ<0u?k-q*9<Cpznx=Y_=AafU7i|z6jB0vjNNBz>~jgH0VQ0lxM
z$vSXoq;!pt<3fD4nJYpv7sSd!VsEEWvITV+LOm1vujFCtq9`(%Q3j2?wTi1Oi#sGH
zUz1Q(uNj3&R$bIK@u#7Z6MN7n%*oYJj0%pb@EX!h|J<Vj-)>*I+!UWRjaY-U&vSMF
z)|M`B+2oV0KLyx2B^M5XSqUnaKYk6DfalvNb|YV=&WS~SLG(b~a0`bhaEh+$)&_*@
zP>S#h7!1SFPIM7+8fQ`3#U?5_mv2|u%*1)7c(y9e6=I8W@{yX_C@73dRZgV5uT$zC
zfw~x7o;6`qyT3uxYoXtF9dgJJlD)&mxkre(Fu3fiwQ~^lym2@y+PMKTB@FuJvK(q+
zJYb3O%$#r3&~Un3v#6olpF8W38%zAjPY;F?_qEgG7x;I~rPn?Wvy(mSm~yp<>dcBo
z=<4d;mhS3Uz<YkrY`gs?AzPE!{EYT$Nua%B4;E@Bo4&iu_`IrRvb3yv398hXcDKBk
zoFjmlrIt~N%w1{Bjz%JfL{%UcasPB|g|&X^-PNH)Ho69f+yls<;(jU@I}0^5YupTg
zJRV}B{8s6O6}TZNSX8=-gU$R2d^M#gMX!{xH%f6iR_Ff+&IlcV6D4~%o-ciGIeo8d
z>AHn{{Q95(_f{27-0t2LbYQ+)j~9UUoLk+qNXcZUYG71xdu~U1m6v>g5N#5)X|p=#
z^-0-Qpw+*wn!GvS#i{)Gvy<}%L<+~>uEj5l%@UP?!Uc=#Wn7>++rHna>V2RaJB{oK
zNv`hXrU+*8%f&j4e2`7|)3)YsRf}CuOI9egqN?A(2^P$q(%xNnw&CMEjZPre17Uc(
zyRT(&lqmcfC0w9v^4-&kp4K6mE1JdrO^SrmPV|*c;-9b9WNa)85KOOIln-eNt34C$
zwCaSEgWiBb9zEE8HP{j-G&G@)+Mbzi9i)eDcMnu#r(`_VzW8PdSjAE+88YIQU)=h?
zsY#j^#|fC5R%)b1rbjD_Ogy<C(Kt9!QMioVOSjaY5vFJ+^4yM{&vKkfDKR5hE(6sa
z4Iq|NE&I-rtId8L@`I`h6`JG{xKgqvC-!HWnX+wMC!`aQ|N8WUKK7t02%qa7Q@O=%
zq}O(2Lk^kwbX5Ewi%tTunpc5R?UbbWJTva4)0+gbtteNAZlwBr<@k2|INQPO%Y%&d
zo6#)`cHcJasGweH!4B9m3ukmM>5!-2!#h`Sm&*;7EyEr|^-7Neql-9-@7%lnI-?Eq
z=?o~ErZe3()OU`&+;jsu#Oc<xI+@iLVOHB>g{iki!HtIh=(g>BdYk?X#=L7TN;tE_
zsk>se{{_^$?>0<ckwmj(5l!Z9WIvyoPI=M$HM_zLZa8&uZ^gxH)5Q;4+fm{?+zNt^
zF6YWS!;uOx^Ai4IYypb85&Htj_>B(5k=4FweXOXHw4VnE-l_<cGpL2H(Ry{Ksp7O$
z!=G&z5yx4T&Mr{t1`=cILNQ{Zs<Os+Z9jFqUZwI{p$d47^ByQYu9C2mQCS$~2&#Q5
zvfR{!KWSZ)9~E$(ZrmtFnxUS(>{MLna>u#Zw);o2tTm;7UBpy0;>f&&QFQfNp24Tp
z>z$ZA(rD+f0@S7vZ<FT)wi8&4B|wk&)6`!aCkGjA-<!tbu$R49%86A<^~r6A!!UGn
zr&gdctmX#U_$3hqe&sRT>q+&*ox8yX8$&hq2t;DJg{qJj{h9+hd{0pES~8|&h`xS3
z#lQT)F65=2%t#Kj{EEF^I!6K8ti3yNAI7P$a4OmlJK^d9`tF|VL}46#A!&ab78XX=
z=HM?!7~~}825O8{73NuE2f|YwRi)GKREHbioJ^}}?)WsCgjunxlAs?uEy|56i4~<x
z{d|#|#<SC`TfU;Dhv8s?p?^Lp#IIT`IDux`C(`Meh4PMqOl>cf1yLJ}<tdkLDM&KI
z?r|bD<4<<b_N-bq`eQ=Yp8Hb=Yf505U9@+iBiCxOG<{6BAL!f~>+37;;O#7OPpcX#
zTjRY0qcUZ%3o+Q(7g$8pDtmZ=(ubmEQmlnV9B|fibzj(plG;I;D!nRWyoG(C*vPeA
z&yDaLj<tCuREG>une2?aYvOBH{DXvJY!h}m#^-0NvX(<(52j~i=u>1-*{ebO*CyuU
z__1*%?C~pmsd~hc5?*^Q{ypd`q|-7C9!|k~r#nKV-?MysLwkU!5>gLDTNt&DJ8V3!
zXskyzu#ypAV{&rvJTKVxNH}7mT$y*f#aa`V!j=rfH$W8%=~`*?zG!kH>A<z6HSgo6
zGfsu5raQW~jW-qBXl=lnj+2QM@GW(n*sjd$X$-2G!OPu?|1_uMLalgzeseEiW_Su}
z_!q2ndupInox=R=rKV1&_KplxW?~F|dZ8VTT8PfU@at=;^P;${&;T)~4G=D~d%({<
zlBX1h<r)|(_ldMBIjQ#-KJqo`d#G|*&4fW2_Bfj9TWTQd$V=xwcM)!Srk2yD(^{c>
zxV;i~GHtI_SE!Jw|8chdJ{>9F!+x*wmKeg1p!DFtmRCo1qvh2QZ|8QqXQ)Hm`PB8=
zxRI35S;>PbNy>{)%jd@B=lgpreG;F0fWUwbE|u(Tkyj?*1|dUcR7l(YzVW1r#YLNe
zjc>G}y$IZFDoA1y&sK?yFH+try@2kOM2Rrb_4Cc^VBo_!qAL!zPPtb4*@|Cn>>b2|
zg<g%8QQOrHQ$E1TV7~FaXzc-Bh|8NCAhX4*=poPn4YoG|^p4-vXF*`Aie>slJKNrH
z*vU6n$LWnT$wOXZNiCP5t6XiHi$m&ntGCVJc}4loCD}~bC26uYBPHbKXYTCIDCOwk
z%*VyvXx-X1{hWyv**w^+t!=*JgnX%7pBu5uCc2Sga|ws8-<F`_Nm?77?8vpp!xy!0
zX}G2I!ht;eZ1%y7-9g$GQ0%nb7mXP>y3fF@lC^wSpr)S>K@;Kl{E~EmjQ-PNl)Nm8
z>4}~@Mk$q0_<>!mbd(hnww!s)p=T9xmHSqLi0sJy!-QR^<bM*H`m&<@?W=MF6xBFq
zQHKeOjj)K@1)a%t<3Ov}hAq->+LJR+n5cHOUAhkr!-g~bi5Ia<3vJ8oovT>MD=OA)
zTiTM{=u0C@?fjjbY6l<Wx7<B};m=V6${;DVBSQP*wi3>#el<+M26jLpkkjmKr^x6k
zu{`kv_ha^%7cSHi2Z#3OTv_mkx8ILsi@2(&9g@F7<5Kj2i|bBDw5Vw3LBz?pFg8?&
zXGpp+)cBlIh8|@S?3bWW9vn=u$tnNX%Qh+f(emUn<B;N%F1OPC@TDrK2v2z)Yl?!&
zV`cZ97Vj7v*{T6sIPrqNPNbjj;!RoO;V`9hL#1<XS3vPsUbAA%XtUQ)n}G&)4Vn}b
zOv5Tl|KXmFYiLBe<NfXyiQs+FQf(I{e}^0t@LKC)-8Nb{ccE?-?>u7)r(Zj&4I?8E
zBaPe<LmE85yXEI!75Z$)y3^^=ohy!~$eDMo0!g=NE{scN_ec7*V#U2^%KwZncIdI5
zjb4t9r1^x(Vi|eM&b4;!Y*2H^35Pd{=>raN)cvxVkJWgvT0ym&{%<1!&F0QU#)^j)
z#?`)|ja4I@9t?Ub>WtkAtIenV#@x_Z2zoeAJT^wmC(z>5o2Iv-U9YZ%7<4NpfX3|w
z_pP!INZ*QuCp?tpK_UW2M7>(=8Ji+U|5B_Do|1xz9=7mP!zpmMtcqJu3uR0Xy6&ws
zkp<ew(As}Ua)e_DLn1*x;u=En+3O=Pi&S+LZa(8+<ALS<Ps`_hLRD=Y#M%^_Wf$lT
zBc_IXIxV$gWyZqYNjH<R!n>-a+7d;E2%6KI*`N?%oVIgV&oh03JN{JROI#Im;@p|&
zHM9&!6nIR7$ux904!e@Q2L@80f-9JIJz-;rTDvvxJJLvn=U-AnJ%lV+s-03=Ya$%O
za`;JOhXOMP3YXKltx4>kUmLymz%yw5!?nhh&do)~H<D91juY0gd!+tpiN;a~8WhZ=
ziJ7}0O7r`@+ZXqFy?8)0Ad)!{>jH*uc|F(<S~Ud;oLgc0*OV_R&7Xp`>*kS#D6NLC
zn|TGOr(yXKErKeo(aVIu8_GN1+Zk81*KfDqf2o!Ybb1pA8OKU`eG@KkQA|tY=c<F^
z8G*SMeRvx!SX&RmYjIusJh)y_F9qYf8;`@-aOIWGOO{UVu@Q+ioXX;%9&97o3v457
zO{47DpbCSgbL8>{Zch=rCBCQIBKq)vl7HH6ZHnRB!{90RQkjkE6|vJ^xUC4+%Q&KX
zn<!)XN#-SY({IW!RI~X7yRB%9f@s8aRM3=DpsJ|G^s<OoWs7p)#qIb%KHaObNZh`*
zKLyz9_1GJMsx8KW<K+o$AoCHF$71dH1_-syR-?xW6Z<Gt)796D*M+qc2+@4qD)XV%
z&uRGsL1MyX9Cv10$Yxv*c9eO%h;zWjzQtJ~x_ctK1x6vXZZyAST}jL1s1&&tOgpZY
zKCQ<$kSkh~TFrjXAuii3AOd+ReX*xpNPZ|pY2*2#S7v8nBDRXR_H5nlp&!10HSzp&
zbd5BgrXwbja-)k>7KF{7A)R_7rk)W^#vnnHf4g(r>OBkDlzM-nu{4IMCpwKYpbMov
z&O0v4Kf<$XoyP1XlWZ~LMxWNcK%@Tom3FtX=UTJ}j4X?3F-P6Dbmm^rUUf=5>#hdq
zwR8|!I})cm?GD3<usN;<Ed{v>ub6e*G@P`1ps!IuyylYV!D!PZ`V6<sv8%K(wJ13h
zX(|pJZu)zqS#5H{_mZRM3zb1;!BVAvg>~g~LqCe<deL%ES^ZG9<4**%cR~M<TFP?f
z<`UV^hak{j10T*+Gh<TDtaY$Wbh`p-x;EpyrfU>1-&RWly1s%+su9x9FHcV~xnndP
zF`~KLiqBKxaMOC*`QLW({fdpJD_A!Hh#afqzD>Jwu0g3Ii@NI&z9Dh0Y-B8{9#=M<
z9O8r4y4=U99xk?l>P?&uGorJ^a@cT9%J?(K%rK+P*mgo8w&L=DwsAw$uJTfBBAbJL
ztPnbm#d4hUkH<mnyR@!vjnl^jZTLhbrY{85zMR+}DYi<}zclcuoF!qQezdYh&|RYE
zblmBh`eJu(8Q>F}erva-gEQO{^9b_ui90jR$evu7!rz<n$AM6}oH4F;f26iAFeWrh
z-)w9&lkOFx5p}%!w&66pCYxI6<I}JVk=Y!!@tzkUG}RUbTt2X<@Ge`>1EKb^@_5=S
zF?Cy=G<$AkQ9B#>xK<*Qtws%`0m_bACknha@z|~|fmdTb(8L4jL(`~yJEZYR7jI}y
z(+u$KjV2YgXIy&IramExeqpf@o0H5QkaJDrs-J$ER-*tS(>HRv=!4>(cG?dC14P&E
zxy%g5u243XX$zIkLJqwNlA;{T`=cQ12knsn>wK!f&FpY*wcxpU&Yh;fuZ^9GzOaD#
zDGs@dMK)^$>b8uyN1Vc|YXY7U(3}vea&*Yl^#jVLsj~feNQP|U5z?w)wL(>CSH<I{
z3OSSe1csDju}n>m^+cxG(wumkH`*ICY)^f-)bw;Q-v(C9xV$}bC%r!<ysO|@!gc~`
zjzGml;2HBYc|;wIDCKnP8;|p=F~&yoZ{3|yRkE&7Y%qAr8Ek{uFp#HwBF^<n6S@Kv
z$BO`#4t!*V&2N@um0i_>cP`VBghpB{$R{{1?gWf)1zfgYmAW2irr0k@?QCV`iPY>5
z-7}T6bh8Z=s7TnjwxD+v=>WSm^XS`Q-uF_%4wG<1kB?Y@kLdEE!&s(ds55LOHTimf
zG{l+%{EDobvfLXbH->Pl!qp{$+f@Shy1ztM@N6qqd(~;Ldr3>SZ_CH-l(O3?&afD2
zdtv0K!q~RkMV0f=Y8cpDYL6wgf6v5cNK^U1s+|y9N^!hID@uuRsJ7c&pyC@T!`_|e
zPvXXVxzznQUvy{I4f!D4xOVJpF77(c{}smX<t9qlIetHF&nAt+HgMC-PheNFDdL0T
z@U}k~d=I%8Xek`xFjv`|vS^E*4X}usD)>rTXb+!QWptV!bc$(P*PEYnqL!D5)MGG-
zZ^5NFQI>Ezi<)BLs-iBk`Vm#QIA)yCb^@YYDXsxgB{oFp&=Iy4>_ns-9}(y_x0g7j
zJDlA()xL9W*KodQkwuiJeOcDRR8kIz!W6noyRs6FHnH2<5^M)m5lrK3H&=Pv##Lc>
zGuK}@J^TjjH4(we@)iL>mHJ~AEUh)+9{1jUn||hq$pv|Ij&X6y={cmKl<vg%+p*0x
z(X~aiJf)>GUY|0!EAPDG<{&|8uQRwGa66uYN5<Qva9<mF_ic@$d0dD7bc^^SW~&yD
z-qBDwv%+uC+$VD6{lPe4W@XXD)^d-}0VXCD?w#A3pWE&-1x+>a8oW<m;vcYuPc-3J
z4xqkDmzT84x#eg~B^>i2*J?Q#Uh?n7ue}h93)=`7?P)>Y$a~{Z#qcb1EgH$WGs?K#
z>gs67zLZ~%i{JGfVzC8YM9M?v^V$Lx;v5;e(P7!K_}D_Yw?_Nin&r4ctiplBvrW66
zyl7{%-pQGShk0cDlX`hvS!H2Nev~2uuQkm*2zD61K+)d--_X$fxVr+rjJ#kD4wx-k
zK#lJJ#;v01Pr3sgji$<$S6RZ0aajPHcJErgc}sH}N@ow!+Wnv!f(FO~bUB#?aVx%p
z(IP6FMneR@h-5I)?EJaJx}B3IPP_i@!KW=zcX-Io)NFUy(N}G7Cx^L1?aD`4!7xQJ
z8$P~gFq{3&mq=E0g>{Q=%RF(8H^{C&A@qoI@l3NgM$9SlwAwXFrRd<Y<Ed9<rA=bU
zd+&pdf_XTn$9xr25{YNUHu;Gd2x3S~2EUw|7-2N9y&*^}*;&xd{L9qWI+ee|)RiuJ
z(z(yHd=aq5_5E@2&#g3&pKnAkAtoND>Ir#wuD`ALhG0ytxcBr5Z?^Y7sxrxtKuGaY
z{8Zz}B1lT111TBbK$@VMN$-w-NK|K)P`Zz*w1@?{ffl|EK`+9l_rsqb8GvWjs}#W?
zAEFuvdL|N{i;3Xbb(XejV|}MMMWXCo=Y&=yo1k^=+4LT2&0ia}_PlRbPzTk8NT({-
z>lg_3vxe0DoF|)%55<RB&>&*yp~RLLznIF6%q*(iO02^tSnND0RZ4=-!TA8BHMR@7
z%sB}UoWB!Srw!_=i_YwgIH3c$L2h!g@Hd@v68VhopfQiS9#Gb7!^c5vsycVl=N167
zEI#HH%bb>wA`m|oHzFul&u#(J%22=qu`u@8x$Hi0sWS=|3292T{5_;L=UE+8M_-P>
z3YC>B25VNf+ngY0=4CeWqCdXkc!I#>UNAn?;T>060S$g=fcia^ePvQGVod_Q(m-#p
zXs|_)+P<JMFRwqZb{qClfA$8IW!yRx_z0MK0nOaAz|XcnEzT7gk)H)hU9u|>SgX_%
zpLY7ogfQcD%MY6x?Qi`U;S6xr0iUl|9*q*>76&CS%c;>fa`OQEkYxdJXSJTxL4LXN
zTxR6(3GN5z@>HIC_#iAYr1G8x-EmGUy26@&JG=s2ci8W&^V?o0%?DQBQJ(4sF552F
zKnNBYSN@`{2n^wcgYF^lmMG<<8-D?YYGDs_dreNR#C@p#v;m&y5Wf0!hWt{&T|5q!
zAE?Lb3%Qyd?>)p`VbInCJtyMjhsl{#n2fygLarv(nG)@R{3Jt*>t^w&$26l!ZwIPB
z3tI6_Qe-fJn%oA)Gw4qw8K8j7!cDsX>S)#gW!dQ<+V;DlGK!M-BoGMG00?7!*M;3<
zL*~Q=q!r@=D7QePI30J;XTZ+n0U)+&)raW6!FT2o@Aag$+mMXISxA5u1>A?Dxf1dS
zh!rRbB$X)&<D_a&<N+HTX;24wlN%EEhZ4_2O_Q!E>pmiA1-Oo2pE)~73vO}7dV!ir
z-|x~z+0UT2@xg~GgDL8P3;t>@pp}uyeUX#U2tAEDQ72u|mCGP9bWVYQnh2WBYj<X6
zG0Ej^Jssv?(hFidtY>-sjwT9gmDWK{R7_!z^cuJiWCK(TkLeCOgJ#Cxz%lE*C!bA}
ziWFAhgSA3==uiVmd#I;Et(VqGp5XKS=T##6!pQ@rBv14r_DAzPc;M2rxvt)yfeVfu
zY8f{9nyT7-{9@S4V`wH4k>>`NJ;^k3*YpNxR9!j#EC5mcGIx`6<yEe;P=5z|FvXVY
zV&E1SJL_N%MYP_67^Jw!N?VJer<tei1^pSm$Y@<pMulHSO{29>bWd<ynHSr5^O6e+
zU6smxhKk2lKv3A_OGO08l`E4o&b{2S*a~>1rgKm81@u!QD8GOeRJ50!Q+6Bf@ja~I
zc*^bcs&S#?jw4IFlNuB|IR_kU;sGK9S}ISp!SRYne3gx=!4DuCS4ecT+{<;+hlLWT
zzhk~oVCl9&*A9fy4Nwo-b&14bY(Ul`LI0gP(XZ4tIp^E6FpeB=RZTMWE-AZuf?Q@+
zD65*(AoL3eIA)1J<?>L9NCW5#Xb4iFh;LJu9IWzuD}1=qlW34|htLs<>76zZ{|fcN
zEx-sgR)bVuHpr1m36(pofblifywff-IQVV491ZeAu4a^nPYzKBx`R>#q%CTWE}9B+
z(62=`odfm0R=!Yb#6#fdf~R!tPV;!7f(P=&psaEF^$eNwz>2=L;%_FF45DBLs!-Pj
z4k$DJ95h4356ZP_$lq(tw-!q2&VK^PpGoMk!WpeBDSBs*kB{l^9yTt5s!d3TmsW**
zikS}HG3>odG9CldrA03=t%H6E^!}Wi&_H0Q*VtW%e0V_lPULxDXM)BXGn?L@)zCTa
z(<f%MBXiv+5JAV#XdtTU{;Cx`_qHp*_AV53ZTM8}=4beA-H?YH1myGYv6)04LnF^U
zI9(m6YlB<`MH}<hCq*;=EPx8sA?zj;Ms_=HX_QsK?H6>%E8b3X*@<2_!5yHZS-aEc
z2O+qi8pV_Ev$l`eN-F^dw56gtjA=_Rj{rV>+n{z8N;rB5x;z<*&B-dWgSvdeb0Cal
z;|JxkH!$&_S|)LxpK+}rS6|juov=)`^U6EoFHkw>rwjHF`?;fg{}X#~AkuAPk!|(k
z0<g^O&O<}Gf;IQ_z(sIjK%)0X|BZM3no|0uH6uzGVkbZ_soV?iRjCPxa!S10Da<(m
zue>@WEdy#-lZ9*FgAuDol#vtY9rw@q>-$tHU<Uoqf#w=eHV1r|di6$<GJY!<lz60(
zeE+~#Ln-24nHYpb2Pawo1eywHV>lI%ZT|O{hy*Ck#QgsHnb}%za`GR$0b>wn#e~DY
z$U+|>W<C4cN`Pf~;W06OU-!qRcX*PXIFf^7DHVh;{rWeMt}yxIp&w`Y<z#i1L?CyU
zdgMAc^v9%p{MOt5A15n0XWoml<&tOZ`Q=p5DpFVQe&gjouHoneD(7IGLnpyP5~{fP
zAHD$Sk(!gNN2Be3-oU>dGT?+$f|Ww)B2mBc|HmGo+lQ9r)CF)u*x@bw5PQ~-y!rmV
z>Z28ZxsgM_<U|9xZdLlu#6^$*`{=*I>emklL2Gq<I*|Lh!2(fyIl2G7Tk}gm^c?5B
zPJBj-2@mz>ufPNQhXDJ}Oa68uEU0c3>P3-IE%+0Mfu>=B-zxv#7VDP=`zsl`%LlfE
z?xP60@(X?+!v7n-92g?_8@_!tG*Xw(SQ|~VMNR*vw3?Ez3jX%w@0i&OurZG^RO)c^
zM(`8Z?*jBEh<``<qhGI`(aHt2!>q3a{z*)I<Uut5aku?a;9P=(cM896KBv@s6iku)
z?IiyM@tN5~0OS3Uw4#VR?*H-2KS+jv=m&)x`GVKfy*Yp0g%%+m>fgxmSE$WufDPJ;
zr%ET$;AQ?7a{O`iNr!-_lLO47tUFd_=I?Ffy!4Ly@7U`BUA(fcx;D|7Sv4u0U$+6S
zL<&NlAIp5dphHMxfP((KkVgDpuOcVbd+)z3hRy>>xFDwngBJN%?S1Z_Q2RlIza06a
z7y3NcD<In|?wS0K68-8@dH?wXAIU&z&CVOT@FQ|;mzS#e-$%1fse6V@7-_c2`S|<8
zM)d^eBLD3gAgn<T0ZtP;rb6=l9rQNIo%sLs@DCEGdk<dH6)8Xe&kOsHBn-Z|zzyO-
z6?WIRe=w&rT2H)@zueX#4*vsng%kiXk&gW3EB}`@-nU=q|KA?|o2=_NfrZfVnuYtv
zj=}nV6a~z`0vxP-gnB<tYlMcH5}p~!0^i|f|LfW>?;gVZKZv3Qjso&{oYJ+W;`blw
zB#ame$OZ`)k8j}Z*3B#)Uz6}C<*~uknCSAj>sC5D?pN6Fdc)f6rS**uAKWP>%NSVf
z^cYSIZ#b}Kh+2>+P#f@^&+i!OKghGO+V%FRlPHMZt*cttv6#!hg)6ToldR&s`h5*_
z!}9U==n=w*5;E+h*|5(%XImtkdtqG|arE&rvUW8FMZegcj}kq#9*sgJic80{-;y==
zJg61h7&K0YEAw+P<Ta&xV7~9WVwBJ{D=zJpv%aa}l7nG*#D2@+Uj)VpE2@>^)mAh)
z4SX<;=TZEzKW^<iU0tC1^3PxW!t_ppIRv(UwrhM>CGHYO{gfKtIoA)C^EV%>V<+J(
zUK`ERuhi&DY7>+FNkB$_pwn=EiuIjAf~to!R40K3ouQU?4RC%tS-?DmpjDuNPD8?b
zzO-@1A|bQ^Mo1timXSW8?jkrzzD0{S4q=OtEPkcSvcX^s)i5e=PPYC1xttf@4_45H
zpRKwjg!8qT%#pU3KBUBc?Igwe02;xCs)lKZNcJh4!5XgY_B*-V1Kn&?0Aqx7lPo;S
zhIGD*#R!f9v(AL42%}e^t;P+9ZdsP|<pmg*p1eozXiquI_IMsa3g8PGIYQ_VI5@*7
zGOeJ8i`>{=JjuL^hQR;A<I=iPER17C%@Cn;OG5ZQUEa46jv)xi(9xUdUS*s4l3spL
zKAKkQx%*f9uF(cl=-NYoI!SX9A4?D9s`Nq;mK919OX(YErq9PQ<#CQfghG}Kcf{ji
zZui7-7@e*g*E-`%YTo_P(eRMO5|(LH_d6oXK6d;N$59&HZQk{KO@0?`4DHK^LF?t=
z&UV=C!OjYE2aI(`luARPEfeq7ajp*+*p3jeAe_Wz9T<WjOyb>@;FaGGw<=U9tzR5L
z|9SHey^R>6nk0f;T_Y+a2C@{>pSmPi#ay2N8iG7t^iRR_UBLY(4M8d&Fr<a`8Mw$V
z6h42F2xyhRQSt9s7Iy`3BvHy5AU=}}@#VlM`Tz3pA)|uy+=)@B^Bhg?li&DvQl|Hp
z-`VbO;-`Qe@Rr=xh#)d(gR{xreE(Bs1ME~hK@Iq94|%K5ZyN*hM@mZNmy`a@_zq8S
z3-C<VDCn1vQcc2o@t<%1Urxpa&SZryZD;LQ{QtX!BYo2ltTb(RT=YA5h`#DeZ6B@s
zbIC)~;OGCq5B*BurIHa7zAAUTpD!Nn75wo(0r(hNkqWSop3P&^KgGx3DZJGGp^OMX
z`oY;q;5*9k^>+WU$d4WVL)QGfMWbj)5U7t=`~u-YMm)rC#>PK4YsCnViQ0x!8$4BK
zN!(2O>zTo)|J{-)C}#rJg=1ND$gi@^XpJB&>UU)P&1$j)Hj^fW9Pc3t6!_w=0`Jpr
z%l(e$M~JXZl%s|#-wspB`UjrYu@g6+{O$E$cWd=0IG2t!bM*JC*3gu~{J6mHANoHa
zCi<!hU@T$Vr`^i<s|bdk|Mz=1Qv7+LeDus)_&))aQ{lb$zX-*N3)mC`ue-1CR(hZN
zpF8`p0iy;;g;^nGbo=`@`fo~c{{r$O^7(FB9o?ZPaB>giFJ%f45IHejqWjMz`11sZ
zkWe@Sa2G%D_TNhpozY^!L;Y8T8iUq~PYr{Pl%$K7`M>S~dh8oMhwhDdoCf5{0uZe@
zD*}f1R><CA|GBat0l|MZyQB;u9k(sw?rD4iQ*x2LFZF&<?G^75aFnGLYcM&$I#TuO
zGkK<U&W{JQJbwhKe|3#SSiM(vxSKRZE9b%>G=!aRMi#@~sH^$pKK2#^&lo8f4c6o+
z(LP=Yh5jDF&8s7ms>oj(nZR}t{5AK4P!>QjJGN*RDI;6_DHeDWy_<E{^UxfD4YIc2
z&M+(67v$e10T?@gyP1q50g*#1eb(r4bG)a3A4ut<d%en^(Kjl`i|n|GjI$hK^K7Nm
ze62#gLl(tza)~30b4U!`tC+kgr7npW79(b2mP(Qbns!BR7$qLQ6`P9l4Y-h9JV|8Z
zLE+wvNuhNOC=JRf_HGAPZ}b@-YQ-6d%-P-k7>FMYA~RqSOQNtoFT|Q9BNGw$UknM6
zvQa8Q>wIk?iX+@28oFror;{mbqYwY&=jqvU-%-l@<A*Px7GRo+CcyU!wf9KIOBeC8
znLZ9sWERr=6})VPQWv5)Jj11r26du0+^$_EK9fPtn<(W)&>Olx6d`x0NUam}daTDi
zxeP5s#Oa(>L0B=Ur$vK@??D0Bo}KPp6w#T%hiCz7ldMNk#X0PO+W-v)AL1P&aOExl
znZVuTFgsSn)kB~h5e~YBgcZ_aevlCj3n2bR8;291W&NogF5V^PvRCW7)%)Rrzw(=#
zc!@vXIQrqp@<I<mlCneD8hkJ(uC5qkKc^RIG0n$)^YAI7LhsaneecHx>TW<Z%32l<
z-9IaDVrRA6X20A+9l}Z-AEJM<JcY9bM=UcZu@2~ra{NoL7S$;EA$kUiJ<#&F^<!aK
zVGZq1NwkJwkg<u;RsQrVxfk5QTY&EJ$MVn*zsMsrDdcl{EL8d$gcj73=GmTPrBgbm
z73et52eL*wnCaw>@<<5x|6iZNzj@1wA+ykC`ok4KK)%GiAjrczg1k0NTEF{KqG=K1
zMIW*tQIdy>W3={6KPB_WTT)s_FfzIhNh+dYO8~F}OMK}B-4d?C%xzWMzd?9-!@nCk
zhey;<2e`*w=2r~8dJ8RdrDaa4>;tK3-%pS%a9qOu{Lj%3KP_sZ8{qAL!FCc^Z>jMU
zF*SwA*{-zuG2T37aA#MN82{6IKM;$+`FkDxI%SX>J`LLgki}feUKWhgdE;{gSyz?<
z&}O5XPgL^|MbUj{f=>SXsnF#Bn!+ykIUsJdcfz#%jkx^ic4Rv>bTT&l*8i&n8hOaQ
zh48=KQknZZ{!=olz$HuQ%uTKjl~UsqXh1`y&W}qK${2B-=x6PLv#7yY46?O~0SaQm
zazWb)=d`WMzshXyx@RP;|3^=QG_8-0fXBGjpwELi$HB?=;cYHu64UwGup=iZx&=St
zpJw#$ib*nnae(EP5}<NHiYVTBz2uPgVZ8jVR)T-3#}(v3r+>v>Z~>6GjY|e@9LEpg
z0>AxD;Ov=*LtN+7Gy;8UQZMq}cVqL5{th{G1pezE5=R3{iRl}nbO2lj-G~pr(zpcX
zXQ}Fx69;9gGNRK@5*=~FpTPa;K>CH4&?V<H#!$94@~l%}iP_9L;GgG^cg6<GL~zTe
zS12G(X2T}Kp9;SGL4QfL_;S*}d;r?~-+B$Y2=E3d69D>HfE2u`{2pF5m^Et4iY+q3
z_dV3*1qrfX1BtHlnS%FEez!a!ZZL<>_YW%9fkdsZjdpNO<S}&Ki7c2ud`#-I>wZzn
zfOE=QSw!|~6jS2lFHrh;SL)%vBO@MuHt=zdvRj`(0d$nQiI&Z`9xF2I?Cs5heJoiD
z5=K4+>P^8A{@;Gn@0<DiY)4=!cm|ma6Vjp6(<EBg(C{-Eu1|+Sh(w#i4L-rQs+aZ;
zCJ5Z`tkmBjY!nK$>(tkAUusI_G(8m4j|j5<Ac{`))KTfreRd3eo)R##r^10b0LV#l
zN%I5+^H1!>;wvT^r?w9njTOfw2v86Q`hhk-ssHHLXlR3i1Qc<{z~%-Q#h?;F@+8^x
zw^;>{07vXl{`U4UbOUh?CMHYT+c2KCmH5X;kG`&@iMP#Mpa%#gV!>_+tZUAx(pR24
z56^*fWRN7PF7d$|_!EzwHD|l@{hS4>_;9kAT=zy00y_Mj79F1GPxVHN1AMOE(y@gT
z4?mWh*P^!^)R78IwveI~5W!in*??#3|I^b2KD$pfh$ROd|5|hnhls7;O7+cJ!s%Ky
zqO)xhSOO%<TBpN>zoUYbJYLuDH+B>l`F_--Qb2|!nFN5t*R7t&l72E$?dF1u&YssB
z-1cP@dF-8Wc;87h=&Zmk3}MAeILY@njfC*t{3@vcOwcIe93>p6TvUNvgR?$I<WvZp
z4Xub!N~|<0t&kT^x8C|bM&>5~!3MnlBS>?6!0xOHUztGa0308lZ;(qODN)N_6KvX;
zy2q@-ZS=32Jn+NA$@C#2i<N#=1$`ZO**@FkwLA+^#OGiIq(EXD#$?)(1r)G*XjA}!
zW*-qb2}`sp?+@+*b|3GJ|M&lnFm9h}7D3E1HnNC7pZ7iX8;f7WUwa68zvN{p@!|UJ
z**T4Y)C?&jXFRsw(G7y!Z&58GCy<e@x0$BK$Q$cDNFYa$_3XXmYV#HO8^Ve3a3$%4
zwqCK@SAPSb(QEIg|9WHSaL62;77ji|eu<X{ouzaSXc!3t?7Hc?;R9J`r(*J0=S(cA
zm5;at@3}AK75JhrqXc}HI7O8!peZ?Tpt$3<guKoXUqPcz_GVIK=LJx>s2x08e+9D-
z2|*lQCp&)T0AEQ<XR*lq2PiQFVk_ZN^J%D(tISLH=)#d$7q~<_P@TcVxrC7_o4gMM
zL<&?|nP)`N*{v$@WKZ~((ArDTo>Qpn!*|-ykh(;ABU&vHXX+Ula6?o^wxV8OO?*KI
zeZLL=5akA$gJ2xUyJ@|eSCEsX4{W>`_wRW`2^0o)jYpdu=}6g^z#||-_QOOkq9)|P
zoGwFYQB4za!Oxb}6&T2k1y@)WOU%8-%6dha_6KMBIi<c4!sn9%SBi0@Kyflk8I2J*
zbcb4ut%|eN!ddL#`8x4o{V8DZW&?ju%G-$V7?FLcK%8t!n5(dOzUp#Yj1s~^)XY4K
zRSeoEr<_q$AoVwKd5PBf4~3|roSS-wr1<e^U1oYh1y^+M^K$1=p`z)RDR7N%;T=}x
zBwZWCTaXy@Bu?R$6X(;(>SdT(la3VLPWakX&$6U2wYJYJWyBAJ%SiiDUN3<wTM$K}
zm|zf_G9z}giX<j96O|^}?$Xb{Cvi{?sBJ}?e;>o@9NkLZLTov-SB?o4IU-u#(7G+f
zV8#eXKZR`x33xnV*i)WZ;e>?oLwquV6SU@8@aOyOSf8~yo;@j2=6>f;eWkn{<O)oM
zRyCHuPOG&_&LyF_7e|gM@CPRKnobzJ4+&p-Rg|X;B01~?8V^fBe4)zTB;Xj@6xdD@
zQ?;?F8g#JuH9n^O+fLmaZ7o$wGg;D!9Bq$*H3N}Iohu2J*0euPQ&11Iy+riyar4z6
zq1&#TC%)UKtl#ScPaOlSsqgaIA=9Z-BgmSsH%SBIt=s06;OkkiH+e}97ML&95)ic;
zCM{0<m1ebGr>=f=g4j9@05`ULEIdYPnIKEay}Kp1?Y*h^6e%%00AD|+EbF?l0qJQ!
z14An5?x<|<5776G0)W76!09pfaNf>_eTyCtZRJFrKMGiY?G*Jt56;b^C~CiD6hL4S
zehf`|HpB9pjap)OUm{}dS&Dw`bwm`Tb^^qEDFxpT+hOVvlhf&QhP_0}hJw%^)nF3H
zTdKW^1!)2s_@PQ!!|4_LrtmB8tCU97@)VLqTb5BmB5_XLvdP(N-vxq}Iy#R3F2W<i
zF;dnZhYD1KQ?3y&p5uKEc;m>4892)fUgeS%3>OJop*?xzNk#%?g9HoX=`mMg=}SzW
zSoILIok3WF+NfA?>mL=QW^0I2YMSqABow+8DbfeA`x%zfuK|0^8b~Q2w_Of#^e3Xb
z@hYj_aH`MnoBJB6f|;sS>`MKT3t<>*i{54!NM$cd6J<MiAg--hW9C7<&*GykZG^!O
z0TyuGDg4Z~w%3~RgyB!=5;c*w@RGchEzd&*6@B|;N?_~V(od+-J1y%@Lly0i)o}~&
zBr&JdrS*Ghh42nF7LA77`B9ZpPp~5ZlA;q?dCA?B(U}356t~LH9;Ii`KbP_XM6s}3
zs^1dQat@6mX`gystK^xjG&BDpmxh_;f#2eXh;>9OEL(43M*q7X)(C|U$FW&FXzgV-
zv-K@jrFdR9L(F;=4X4mDiGOQ}h-5gVS6+7BOeI=U&CT5utG42+1Bx<-?`tWe^Bk(1
zcZ1=`dzy$~pxQ1#_UMiczbLTWZzRp%GV5+|yRDX5lx=%1@(-ayLw1KTiIbpT<5GZ9
zbSufnnNibjRCw%VL@TIW2B*#<N|9*48<DsId&53zE^bcnm`@fua=`{ZjjDH+EB8}|
z+=bOj%w5m}z;!-_;}k^9)(KMZv8$dP8k%V~RTXn5WxRnM4(_6}^e#ER;E5fYocSm0
zOnDXForSPa=@ppV`|pw{o0WGC?{OOL4t#<o{(0_eA2F~ABYuJ~2fd_g*&Xyhd;#-)
z_VD$QhkLNv?D;X6{nH~h?MI$b@yx2HC5c@LUR{~Nr$Yv9CMQv<Z~boG5T77hm+9Du
z&E+dVy+Hs?EIczx#=CBq#0k|U>8sLrOa)0UOg-Roc)?Oh$MIx~DKlV_a*q^g;<GS5
zqvw3md?2MsL*M8uax#mi`6w&lg%rHfIqxh#lR~_O+F*Zj=z#W{!+^?1d9+>;f}KMT
zUTLB|D7%(5UngtNc?I%0!8+HyXgK~v2arcK5qF9uC=6x<ye|08Y`RZmQO7HZulHu6
zXg-~~=oxYQfqIHUgBY|gEkShe^XS&qKCHAtsPm&jWb~1;62TR;!>g_>5d&D!fDhav
zGPm>ac@UOULFSibVJ-$-+o_iI5TfpM!M8c_LDvq_^xSq^#PI+;9BzbXntsuFm!<T{
z@;2qqc#af0Yb_#<0U@W67C`SZY?0117}%m1Ovh+?$P`H+`8#4-%f3`P=X11;88#Ip
zsYsP{PSNMAKw`MXfUJ5{_?aMLuysyFBQ;&)F<9$yW%&_z4Fw|`$W<|-$O3fcFk<Wz
zcJ-mcm6O|96gewfQ!B0ALUwc~u+h+c6IlmJVeI(A!{2@`SAvTO-)_Cz0q7Cv(C0I<
z$U?`noF8!l2+e=!;;qRK@vee4ClxIf4-i&izFh$^XPVH}l5BM7^eM%MuXldY$+6rn
zF0+34SLPDs*_U+{M8%!y7{{fh4?x7;K2agIvSRYy*Vbg{yKjT~<lXoyiWL)A^;%MR
zZGBILQ+~A&gn-3cBsD_ArTxbF(NXXKjK#<#;|~u<42r(89(rKVhrADusecGuX&G#n
zE}%&%qk03i;otln`ZAlO>vw^D3ZSrf9GuW!N+}KC=Yzf$c%Q6pXz^1fsU+45hu1tw
zz&utL<s5jo#9b}Mk0V;}P|O3-1t0jDxy&ahzqI%fZjn&k3mJBRSpFYnR~;AS^7Ub5
z6_=}mlwzSM5&{MxsJI3p-CYJCQj$uuD2jm~Aq}g*(jncTSEMAQL&N}SK}t#A^DMP+
z<9A>G@U!ebF>~hh`OX<Wbo@XHFbQi<Ui|QfzHZ+};5nd{5X#8vof1P<{KC$AlO+v+
zAtgsigHRg3;1f)Qv{Ir6G6sKr!}iTh*LNRQjbuW+M8p+#qQ36%M9F4r4?%q26h{C0
zer9m9jk$>&cPxIO;Bu=`)&0+o#Apv^LoMzU7<VMacc9?Xp25d5Is~J=a5V>Wha_EK
z<Ewn>toV9k-)AQwG?j%QdBIfi&;jhHBRIQovg#AV>1p2)sbV;yX9HC6!8X}RnK_1n
zuN+OMJWksx{dAGgdVA2UO%IfN1R>#?z(7CnMuG{DLuxz8ckY+tmYbAQN~W#4N+*KQ
zU#uYB8hkxu9?B@+0l!N&jZrmS%7e#|*a#3un`RE-c_|2#-Pbr8{PgaWLc;Nn1%zNQ
z4EuxLqA^nXBJo0E@xi+OGV1M!PQ+W0Z{8j#kFT;%jA!}ySe<Us1RlAlrx2!Qb4HD?
z7`{G_xpD|ukGfO1b9<B(;^Q51lqVH^MoSa{M@p##;j`+YtHMASE^psBDO0M?{z;3;
zEixh|ODzfujs837)Vo#Exe>t+ybo&TRn#u-@$&mb_=}zuh04n)U^knm0()rl<oq@w
zqVfki??%}WN7&^eSdO~0z+N-i(*7PLX=oMac{`@(@%tTH#l4T}qb)3z30GiYkTIsp
zp#B^WQy(89lqb4<K|+IJbaN4zw)WQq+7@S3IWj~B3$TnfS5#YqcNHHaBl4v%u8}Kf
zs(Msk9n~y}|B2`{XIf5XTs3`c-Y40zTq)(_qG1UD6-FpYIiaqXUdeA9*^u}{KvL6)
z=*B3c6(aP554k9tZcC41UksMiGD;FBFOTI5NBSPlCZZ427;)Umxu-`nxHUC}Hy=Tm
zrl(Ew1Ubrzp&ac(AS4J*HY!R+T3cAE1dpCJkXTO69zCbmJkBH_!um;rm>)AfbksO3
zXX14waXi$0;&^GznZ9@%sU1zD#1+ij9+>HDS>4j~%)6!@eGTBAfQax{{Q{y9k8Ltm
zKXE{tpj@5pNR&UAQy#T=q#_~9sCtkf-ZAH~r~%tC03a|38+fy#5z?)>12ct(??fD5
zLiR~hG@g*oxC;{Q3BDf!y~paAQW)fZ+Q$>M@rMOXaSWx}A|_Ww13zr%^ETf9W8!h*
z<GCo>n@vpz>lxJP^fS@jNKUzsC~wl2AIp3cmEMi4XDsK@s?31dd>emv7D$`$ovJnE
z_o=64PAx*^w$}0*Z#EEQ53%V)7MjTA-}cu{?FPMWXX2?Wp{Ro}gdHk*TEIMn^BA&p
zepDGrv^G*~KY0U6Zcvr)!A;VLpE4j21kE&4__V#Ch?`gp6B#Vt;ty&DO_}pcmn=;g
zIM#KMQD0y~L=X3h&|cy~GPO89Iz&MjH8(wwF1>sBs2KSrTIRY3*pQi2=}?*DoHWQ3
zTG`0=eCEGl#d!qhjwGIMj!kjWfop1uN)MN+W-8WBE?v$$gFoGG_ax-Nco%CkmKg;s
z!@yIQA!EYi{NqrXDDp7oIvISwO~m_(4#7u&U&K)c4p8wXMfjv`3V#7s0sjOBZ{v3t
zRv`&(dEDP9iE95i+@_{WE(kHAJKFMzUz{03CtV!pq1^~Q;IKa5nVX%Jtl-U4t9iRN
zO<cqqZL-)yYoGR+{thgOF>_saUXqbh*2dBR5fsHYB&LUKbf*}(e)~mo#@``1P5vvE
z&G+8HUXhS=T{d=jHorW+!gGhs7uN6lD}e=Y)x^b>F&ey=;QNf|RABXjvA(wNjEut4
zKOk8YI`<l@DS&11pT`eZD0SS1RERe+=Tu>orG%avwlidp&7ab0{vybiSiNZUKo)u9
zmh5_We?f#ceKA<E=y=VRonZwmH!Bdih924(vIg<Ats7()iGqxtXe^9b>R2OfWTW0T
z*k0k|{vv?T0*EVy6Y7;PdpKcH#fem4R5bP8=Sn&S8oeLP6eeB)X?B~z3&Xt$Sv}oI
zI55u`+wdx1CU8MANp*A2Gtf=o2uv}gI>XYj%Kdb63EY2J<-+C&;)q?bn^2r#7sJN)
z3W%uK(^e8!!1^FwW{6VZIC2H}p$+W~F<iP1u2X1gqVNO37lK%lqc)qQ!qV<4_(305
zLfG7BMApUYJJmzKq#*iyu}opmMa!r@E&k*;qA&p3IZ&n7RHx&$keP@bk}Jk>QxKZ$
zL=TUgS7##5R#dOuCgi?rN9@B3;PXSwJ2ad!LBTJf*Z!dK<UQI15MM;M!JWcXQhCSf
zLqZNXeRQoPre(u6^O~ORGb!D^$h({e5eusPEF(g-BCvQn>i9WVKxMFIUX{^kHl}lD
z!RoPs{x<kRi@>mH1Fc`4)nGWR0%jTZl`JYD95<RZ>~Jo7eRKv=l)&mW!R?!4X;qyD
zCr`LYAk3~00qO(a<b47Ds)I=J6jx0ne#XzpN+K}|Vo^;!Wzcc?q4h{WgvAeBaY?<|
zeb)0c%Z{R@-0dzFIoo6<e)FqXI_tftdL$E9N!86NMfmYJp0-S6j!2{;Du@wyb!2S5
z<al%;y#WJ7Kzr(&SBlSf?;X)<J0ye8M(l%a0|yjY^-=naGhaVjUf~n2H3tsyqY4CE
zDw1EYFq+U}>y-&=Nm1ZfL<0mOUPi;wC?N~B&%PxD44VMh7tkN<;AoAXiI_HBBxTih
z@euuisLwNs0{Tn~QwTJg6<)~qe!xMkhb47CszD}{z|}>~TDnv$;#XqcBD;80Y?QA|
zyBxZd6^ag}K=XKP`^Lrxia`{_MchKYBZ=0^GjPUY!^0#G;DEmS*jmY_LPo9kf`mA1
zRB9h3-Ud1@_40cL!8831C(Py30fKRryuw|`^r+Re$px1j3S^OpI+wIccAWgDZIu#)
zn0O?ih**ZBF672)t|d{lu~0_lVcw*&wpy@@8~+Po#>GbwKP;6G`H-^t7Ji>2VEhZQ
zZGBe#N&%mp@^wQNv6oPIf~_Nqa=Uyp3FZu$-V*Fp;-V{d3ocE@au#ejjV?7&ke6=(
zMyp0&Tgol3jDq-}K-eksfsXmnX6n9_5;#d-{TfA{uZ&P!`OmbiHuGGCK1;^aiB@j!
zx@WGbxYZ>ADLU;XAV<STD}Fz>Alarmt#EUj@}fpx9m$%zmc9m1ax?-;mN4HNw1)6t
zxx5ws`Lz?sG76hY>n}!=+8FT_1RZhFfIMq}_=_CE&>Dy+8Bh9ksd1CXuEcUbDZN+n
zt9AY`KkZ8rh(6>zNGvUX82VV4Z(RiCq{Kwm$IDK}@-na7xjc0>A9#YKTi3fL4ggv|
zqGKDxW<z#O#Nbul@;$Jgt>WbF>uX+1yw`Uh9x&hfL;{!#w+BVsrUSL5t>0o;w4Jv(
z{~|I<@Mg-DBI*it*OX`B4UmnoOy`_nrDB^k*nBN2_eAaKbn&uWYkWsyJtr3_yYa8t
zjJK#K7g50JNJTRLIVl_^FvYcTQJ1bfVg)x-%(9_<ITx__m_<Lql5<JGNoYVsdHfA>
z9lN;&Y29qo<u*o&t;C5?0%&_JmykkmOag&!yRu#_CfsTmM$i7s#mhoE!*f(}vrTx2
zh^*GyC#RkyH(qvSRxe&g^_QR!+sK@yyu=Q~O_j?Wuqb(dVGQQfJWzc$nWj<y5uQf`
zrUAFYtkUD?iGUQKUaDdV(VJY6@4q8s54_1gKQKQWW%)nYgeWVEOHILmD2eldNvm6f
zu>P(H(t_k&Yp{SYW1KQ_q#S+r!_zcshU9jO0p02?B>HrDHHa#)WFxYJg?yJf4GAX&
z9??UbEz<XqE-=LP2=JO{%Ba<#8caGYM4JEMWjAYp8r3`1#@0MpP7J$a?Yq{1sMWYY
zd{m4b7SV4LB3MDvVIdP=A{yz}sm-WNsC3$F)(<DuF7V#iL(J6wJhd7C6@On!zrvH9
zH?#c*%v}<&ps{pzpGWgzgtI!3_4QZM(I!4DQ4p#Pvu!s*wv|c$LBw|>*lYyuJ>j%Z
zPIxwF>8~a5_wtXen`Vl)wZ6@I`PZN8UJ0i@-fBZNa{AV<&-WRFkR#MyztQWQBA?Ez
zZC}KYuRdn!mH6MYuBIltI{w$sD(3rIp~>H-FlxrO2WHR1dZ*whnrn@@A`EpT8r3An
zbtQ0Gro>tign%sOt;iIQ=JnoOx@1PpOZ&kEc;TwN9B%#nT|}NC8Z*3>-czS{3;?e#
z$bmh%tJ03|MLqo2C0N0UBB4VMRXO=TbHef<fR|JI4`~5H9cij``3eabpOlbRA2w;-
z%A3ks2I`ak2Dp9f@1Nuyt^6xLmjn-QsQO-z4JiL9XpJ>=Y#DwJm)>#G^Gcm!Tmmw$
zK>wGYIN`kS=6rD(|I{tB)qMfWKeleN&)+r#rxCR|^*;266v~mXa~oHtviJcCpKAX+
zaZ=1FB04IPShMvH_=~_zAfU4avkt-`+V%4GSBGG=h#Jcd#ZzgvS1Zp~k;jIw{IN9J
z^)LH|Agmp=q9yQQu4-!(34~_<v^$46l>t;wQmKK@rBzjvz{za*uMfcP1IZrzs-g@r
zvjGkDKW=y-;UXfZE5dCd5(n349u;lu!oN)o+k)66E{-$E_yV=D%bRDGpZ)h!D|r|o
zw*)F$a=;eFVQ@clz&}r|F930D+QXON>Hh%nS?4G0;!52QevJJtPGX&O)fBZ-lP?Di
zD-&EX0Qgg&z4OxjwlB@&KcP<5f?<4;kzED!i}{t+cl{gGx9<_$<<#a=xe^N;-9~Oi
zsvi&%tyM)7{7S&NTRKM{7RI3}|GS0O2BX>H{(^U9!mwr$u6`sY6j_J0mI-$Vv9uJ{
z36MiiNBjLpL3uripaSNzpKm9j0#@%M3W-S9aM%jX;l-YS)-?OdpNm3tDpvj|iIODV
zL~<<&gBfrFf-rsh!8$rHu;_99Gc&2h0@<m0vW_nbmLA)u<^1CE`2MN_aLND`N)Ab4
zp<k6o;v4=xSXp$Z_3j{G1iWX`BBljqIgE1tpHV<80@s&#J2y$|ef7V;ER`A9mVv^Q
zSj1SIM<oSP<NPOzBUDU8D%A<O+T9OJ@-XE~*`JkL);ds&UlW&s1oyb|bZn+8F*&|+
z_j=t{A_D`2<J-Z5#S!t2YZvIB=-7++Z66*at(^y9vl>!qAteu~=aRq*H^rOMOe0Yr
zPMDHy!<tJW!}ZtA9~8}6cv09?%JY0^y;=WPxOfS|O{;~Kc^bLxq{9Lk_?xL2AVK}c
z0uj+nVkLlt+5hI^6~G@9P#|;&Gu?l9iFUZ6&1}SaD2b%4pBfZHL5Q<LWZp9J0C<2z
z{t-d@NArV)t%hN}HMS(75Y8FHM*r`G#3M0qQtQP7SUseK@GncWJOjj70&)%s1hU*t
zlg`7CT>oF$*BHrSGMVwJLoi%oiB4P>tN%@4zo<(W%X6e}`Wt2-Q8%jyKcdLExsr~O
z$nNVjG(uv%B2)s*spycO&^n5xVo+lwLPBKX72YjDK$zJoA(9458OJ6XZ`%LKPELq!
zs@2TC)CLuZwKON0$MQH<*8B#<=c*wR*yZN~Ai;YPim>{S_3Q|UI|%A|)cdzz=Vm6B
zr`8d2=y{2(^O)}!I7jpBBcxkeDROKf@pS4RaK51F=$Y{%=-JJHoCfWnW+}2JP{v`%
z(*Yt!OJ32Q7r&effTH%b3l*%(!-sqvoi`_O%tzIsO9*txhAyVXw?BN{t9lxmeQ<ww
z`8~^soO7U8IRaQmy4u@sPY^qY+^G@$8s`KDv2n{ld!;YL-uk2W&lW&;_Puh%6Yjs;
z)jDjg7y9DC@(|7zi@*JuN>{C5P<w>Qt4O~1HyomxaB|_-*G=KahV5tI0EBe;a2w37
zairx%DRT7Km*AEH0}E(9S=<VzYg$C^>Wpm#sPrd?Z_xCjg#&4q&w!lqIdE_hQbDbD
zj$D0}0Cln;aWlEv{s8sMLVa0dD6obo=&t=+g?Nr8asoIb((U9S(to9Nw(Iu-()WCq
z)bu>kcAnTO#ZY)Ws1#|=`d#fOa_sLpBzbeH$4~JgoR#TRiu4uz0&RoKnUik8DX^sv
z4u`81^oj6rghL;#5s%_aC$3n)QE`8qzS=%H0nKCc&xY>t??z5se;=jz8R<#?r1{9s
zWzJbz8@+f}GT03QT(#lSn)G5~nri_}2VaYV&Tq1_S_LvyQ_xQ(w_)Nf?JS(*uGYHu
zE5{;E2Oh<6SWkLIPjv#5wiLz@lbwG-fI?*-nhGym6|rK~G-hM2TDfcS18}-Z(OvMO
z=U9Ettr@GD6S*Lg_coyH=8F!ISBPG99pzZ{mTIVV0?2w~iTEQKJ=p8CpLq-EL;NV`
z;2baW3axYq1~tiduJdkoai`LN_vyHc6H3CQ18b!ucVB(oqNuHiDalqG;yvJvvad3N
znjmf*PN3urIJ^_srX-lEeC@N3AUn<ueKBv2_g_S{uL>vP(oAW8LNVrGyB@_lk`9O&
zcAO#g;0XlbzF;DGXbB(*xls+Q{mPXc6iD|*(_(@7GB|?b`xiK-(~;{Qv87ft^vBh>
zj7C@)ho%Dj@#=(kae|h9Yi5t`x=617*R<u)l<gn;L<OLsP1ZO6ISqmXG-`_ag|I}Z
z-M669{+I|I$EoxPF5u=`I^;m9O^VRy=$Q-BHTnDPvQx?Wg8FM)tEk9gpTNkG#usM0
zqgN5{t|g8XQAisMT#2m9Ov9H!-QV^<=0{vyM)NCNkVC<B;H<VTHsm~Sz6V*Bmx<#3
z3)F%t(jYD=EIa4WTtLx8^p{7vb62k3dE+&U9CQ2y5sBf+<SSU`Q`wbBgQPFeu*_w+
zpIz3jvgV@bH)7M}fa19x(b={|#Rwex@Xb8ZdHLEY&x3;5DTpe3<T9J>!U^r`Qs|+Z
z-S3VH`{NjG9b<=AR6401ri8hgIU&wpAl!n#PR%2y#|gdq>0V*+gHw0968}}FzBOA<
zMi)5+cT1M)6DsL(p_n6=h#WApA5XWkf)vk?ES*AYk{{HJ=;bU|SBWB+GRmrocy0`G
z$cXK5yE1Zs3=eV+Xwq#sPgS6gS|D6S=v62j-%*O3SO0|^PPJejg7X8EK8)TII09`1
zIkz~UMf!IMkK(q`_3z-{>oW0EvumdLT+9nN!tNUN$PLwe4FvJKjo?^o-4OCI<K$54
zpAWl#Am@#VMw-utZ#6+qe1CLTw%~`g0wJ*%j`y*$ctaybhn#3u^ozaWD>T76@WTvg
z!r7A7oVl?pVlvTii%|Oz)vt7(-#=Y`s}>*!LLi&majLJ8w%8)|UaykOY!a^2hM!sN
z9M+GWNWXr^D=-w!1G_W!0Xfll>&Z{9xLGc9yDH>xn*nA|u`>hmrIK(=y=d64jSqPG
z&pJ1aw(Sdt10We}V&>-kqo-zCik(x<yXL+L@R-4Yb$={0r|}f`OPg~H2R_>xgq(1-
z$wgN+=QaI9q??||7lW>M&f&$AbMprKYT3;62ThLi%$5+fFdQ6m?x~~x9@T;`^qX8-
zkWE_9ye;96BEyK<F45OL=A#L}BMML9B5r*AeR5%1{d=ZvepLj6vADjZIw#Hf2T^P-
z+i5meJUi0XLis%QNHp~UNq2^JMRls^9-p2AlGhSkMk1$!y5?$v1P#Bvo2v=6vpZR>
zUeq-*?HtkTP&4=KoncXLdhPM=qUWNsKUhAp>8~$^Mh?7)u}G8-;cw2fG49PY?Z}cj
zb;@?=I?^s|=LrKP4y$kPf(TA6DY{*PWwxC*s=Wmbb_8R+8ynGyTXylk6T0<%NQ=r^
z3usGb+?*H`_eZAe>_}UBOySHxLR{Mh9?p_cU^u=^k(Ojg|C}n5Y(Y0ExA2(e55y;!
zd@8|ZS@+kM%rb94Q8K%dk^HC3TJl6v$MsXL&j^Z)^d~GkWt}L+LwPJtyEp7s_!%(O
zHI(uX_}K!xi)O!Dp}xF*yz^_KN?l9&Q<gB-t>~8%!yPSepX|}xAF-Wd<kPD=Nlvp<
zw$QRn$V;`+KKs%+?yqk>tx%1AJa!k4R2yq#Tz|?wDq<7LYviMOtR_#O%dtU8HCk2J
zuqWPyH|0`~@wJ?`bi<JLW7d6j%9_7tenY3hRHS=*ihI*<4%uSo`A}@gF7twyj!`_B
z>g2qbM)^-Z^+`1f3oqUmhC5BEH?Upd(#T*Fkn{GD60#ZS4wQ`$E<7!JMbt(~?zx+L
z#=dOc2O08qr98!ASDF*mo0BvJ6uV?T`p%@GPI{gELfJs6-%g-m=E)yAe)y==yh?9C
zbBqioSy5MIpQ>|MZQkA}LRCg$2|wD29V_!n+i7OvSX0$-L7iaiLZa1$ekIv61eF%r
zo*(T2l?vV=ZnnBE^ESOS$-akWv*HwEf_3OKVr)6^%DtJ%j1~HaP{x@f-IK@b><F$M
z?<2)dg*5JMGL3MWdMU}B`$os{he*rQ8KuL+E_L{Gw$18>K`5QfeKFo-zuCM{J9=VD
zPG**J<qu^@H>>J8WmXJ>0}@LYI`}NWW@)*pi^vWgSXp=qTISo_fsAo9STb^o7yGeX
z)W`?oe-c<M`3X$9c#D$fH-daRG_*cx_=X=H8YLM~ApPnP=AB)Szp9UFK&|oJ_%Zap
z6DfsL!G`eyA8-F@71D7p(o48qJxT27N4wsznu3<IOs@uCJmR(#-PUkj8}npbA|!92
z@aM-9JFok36jmPO^K3~=<kNWOW>-+JG-*FK=0ZVtGQf-nlhc?R<y;?i$rlsJL8uBS
zkY>4bgI54;bSOnuAmx)P+Je20Q@p70I6Ym=%;4_#g2~v<M&8^bT~@Vc{M{UVs}z|(
zKxx(Jc%99;$S+3PmcdfvnA@fwa$eG&3}iRGGV8~#D0R*XqbHR(J{v!yZBa^JlPS{k
ztGf5;lcsg@LCH+*g5xmV|H?|avbKguFcM;kxjx%SQ^%&bVdEd8{?TCgYWmEJw&x!d
znjK1)j+BlztB<5iP)>N3*KRf46QG-Q_V-+Ry={_1e{=vxl593|a)>2=@$C2f8e@VB
z-ENlqQvC&4n>Ghaa$R_J+PrX5Wne)3$k%|bKU1yy6~<{3<McQtX$uWBSv=nn<m=|T
z_Z`3Hk$Izu!&ELf!qTUqZ@1w5M1!=Kl(bI7U_qCa*sC|riG>4^WamA*r>l61Zy#dU
zO_3kt#prXFNxH`safn4y-U!;fwDi<$d&o5&E-8o7gV<56V|%y;7QevyQN&U$s|{~4
za`YMYRu-@PSUKyKWNp)r1$Ok!{{H^zNH@Ji$!z>g{Kq(>3~q61i8~v&n1q;SSv-6n
zsT3CE@w)*uvUoFf@!Z~Lf>z(UZ*5|a9+ZE>#C1&H#=R8wc)jD!A4<avzR1Iur81;>
z4ZP389FgjME+#ejvbMjTqLab;;yZkgZxI=42(>)!CFaHLprSsVJ@aYIja0!A*YiD%
zTET4#9S+~*I&ST})`#?*90!fwzYuZnk)lc|Ua*fo__fR{C4;*Y;~D7Rnp~3q#?eog
zkE5l#!k0x=WUhXn52A}i)h2?O1k6k#TwtHZ{*mI}MZH;$rgdzObo7m_^Y)&xzis;D
zgT3cFl|BhXXIlke$eL~HPMq7iE4gs0eYd4A{pyHupHSV4<1R@ddu8PuEq;G_)E8x4
znf4Q9m0A<$&#B5#V9NBKgO5y#rDTtF-MON{FH^Uhk6nHHt0v_xL}ocUp+*aH(>2D&
z=EgtW*(RDC{7YZ>ASQ4uxLd$};(G1*dUUg$L8ZkV)zQ$sG4`N5derPMr>nY0h~~|X
zxcnY$m-B3mDf*OmtjwR5RVvk}fzYFdX`^8?B4_ei9@C;_@IFlL);mTa`Ld1Dd(-M%
zh@Wu96#Q-1e2Wb*b0c59&gvL>C23|SXVe(Gi~4Bf+`K&f4G7)No2NSc@Q)LiP)W1d
zjk`>|_lsHc&zc*49(b(#y%~ca=g^;@Z28&lmpv)@?8r-m#0t~0!qZVboTCZ#HRm<t
z>7C<e;E<T80F19F&2GLMO71q7c?!S?US_;xM=HD2M&uj_N$Ktbc}p!qV-GE}uPasw
z9m|OPh1H>m2k8v3JV7sH*)2W`BFb?P&Vu-p<A|Zg4goVQJ0Gx1>5d<3rK+BBaUSfr
z@`#J$h?w0uoMiFBjB!-Fhwk_LSKHF`!`&Mb)f=odg$1HEPH^cKefSP)&hh(k9(~mm
zoijW=1{{rCx069X1f}&9oavi9+Pg;qAuV+lS<m89FY0FO5j17^qn^vr@|4$juriA(
zc}d?eV^DlI8dJ~w{E#jYfBe?>513hPP!WmHr-;5eTJtt2m@0wdA8&W{j5`|un`-S>
z2jPR^I(>e`Q{<sU|6^Quw=-k=GneIDKz7tdU)EkKCbi&Dfq?0hqM7*fQx9klGjzsU
z-6*l`Fg2VUY-uq$ci7V-sNhdX0ld1mZC`i+$JaRsUChtbrieUZ&_4oo22!_y4D}f!
z$AqH1#sDTJorZl?SyE$}+fj{M$tkXI+-y=-4MOo^ItzY&bV#Y^*37)2Y!udlL67yl
z^l_Q{ao=<2iMO2U(Ua}>QIkFb$VQsxSoz3q<)U8t)-hmmjj|_uoO&-EY!u#S-}k)4
zE|TH&ecdk}2EX&(i|(>BXufK}_eC>{MFcsewmI8e_te+NCr<zTob|ak!X-J?eyqEK
z{@MJI-Oj&0Hz%m_3^v4gMReOqT1_^ob_mZ7`6^FK2lKv!vTO%%gxR^h<g>uhDy?0K
z>gfn)-2Q~yB!zo*;r`Z4RUNxSieqELOr<w_D*bz^-Yp4=JDEDk%qg|O!K%B&P7RV$
zuIxHTN-<^}uu6vxI0qpD=HZ2-uc<V89P&Q7QH6eb0>-@QMoaSWsfg_a;q3~8N&(E|
z=Rs3t2c0ksb`$aIs%?BB@<^hBj{3y)I%dtBo6kF;K<l67kR&Hp@4f%RkO#|?RM1Nl
zn412^IA5cP>R>*qP=On-tbW-|QvV(V2em5WftTR79U(_AyETa231z_)<$b=lYX}Te
zTKE}<jZ8SM$%6XnXIVb%yXfWOKh<sBBh>n`Y_Bosbajb`=edHdob0~^8SXwCmhJr0
zaWE;n-IaX9P|nvUC&JPVBb>6j68)1U8wk?lVYgbeZ)LK0olZMl_^gWY_`CAl#Ka>P
zIQUss+>%AwQ45f_)7pTddEtCFcHfH7BH|+V2da?<cY3p*zki#Cagx4qN}(<O%=I_N
zprdMn3YWvgd$A2YO)P&Xo=AN8mZ_N_9n#noUKJ_km1Xhut(}={C!RTF!`73`Ha5*w
zO$n+3O@i_z;LL^uR{zq76!pXyxphnLp-DV{P9(#%t80@axvUG1yFhj|e}@uV;^jZ@
z(LZb8N>qFK!ZBU+HUR6lRRx@=?$gTu_2p4gVq(snoUBgU4ve(2tGPS95rgyZ`JWg~
zKpbgb7VJEK!}VHT*$m+sEw8sA-`T1q4xpwXkH;<R*f&zZC4qb7g6|Mj!xOUTRl#~A
zhJAGw^j^lu#{Q=XW)C_=?^#L4*w||gj@O<|QP*wQd^nW)B(_fVRRS&h4wFffPXStE
z8$YUFJkfo{Tk<2@&7s`haCDsLjXFYC7(v*LS5Y|3crZENjBrgP`wU+!m-vRFnE}qO
z3clc&y*3G#yb8B$pU5AtIW_xHc0X+z&l*jH_=+CI*gS^qrvqf}o04)fvX*pSWPH*r
zso>Sbv}suQc9dI5yad9uEzFLXJ1=NvnFN7fUM{O}xEc;s`C2fx4du?z_aQgkPP#?c
zr4TK63!{B&xR$e3x#I(2&#e#dO>HivP;EbbI2IBnoxGDHU0p@f-*zR9@zI>_ee~es
z^6Opqk=HK~4C{fSsCD<aB)?2%0-64bnuRTedHsavbVzJ+PeI0VKhsX0au!D4@#9`t
zZUEvoXMXR@caRzG$W9sk)ulD%pHiUnDp_Z?!*RU2kl%l3rl%^Pw{X~A>pS#|_xU-W
zdglr~{^Rl}mKReg49t#un&H{?KQPPbqc~NQCkjVA`P<7+41d1A?|W`_n|JA11Yp*^
z!7uGPgE}>vM+uWLMSkJ*>_{<Vlul`e?~SJTp29qVPHSJ&%qA7Pu5G7Qe0FLqUJo@y
ziI`z#e$=b)+@*vzUJ9aBM*b`DA~RZ4Mjnq!WY5lb=d<^__43*)gz%d>2J{@>?`1L>
zd69|Y1Dd~8Z^O?mdyUV|4xRAgiq046yQ+CdVvp0r`)1HH_%?Cy2ha?9x7N%F8;7Aj
ztr{;@#M>dAu--vd();nHv`DiEbP7Sb1B`7(#~qUo&a=a&pbfV8xMu{R#pTbV&fV!{
z;L=b!FSHPxH`%N?D;X#f4~M&ynZZgQvGhT&%mrb+ayCe^!&K=&N1VSLMNPt+V~4Q0
z#KB#rlnOoh{?;+fb;*Dn=lK~1mDCCl5pOe9lMBo#D3^t407MskXqgyHDgKtp-Nvw&
z_qJKN_kM<hf&3lz^om8*0$ryk2B1Ap9KX$g68NN5M-;J^eWzlu@H+f;bnHn|&7|ZG
ztF8RJMwo_8pS=CaG7}KlkxosoJ;P`U8g0*h95l-xe#Sq{WSn>z`dvtdyd8P_tvX~@
zT0)58Jmc+=!k;Dq+O|LVh6~1Pnu}dre%r9gRoXPGmmNnJ5LBWyv(2Wadn00!x!Zcc
z4LBj!amu<+uR;-A@Zqcuv-DSYHg&YVylPq#DhMfrmNpelQ~Fq~?dudQ-E%T!QRa;x
zA*G*~>nTX6M_rym7R};{{js@V`dbJAp9>1Sxdvhu?3P&N1eLcFNDqd$yg}O=lQdgy
zG{!z<-`PRq@wg=H)<+{wl|&CIr|tA@pp&<!7wH_94L1QNID_A`T@8%&FdRDj!=ESl
z0k{rlyQDQ;<|m~iD<%h;s<t<{GwNK2l+=qoH%rJ>(ha0FZ*|(3f=An&f6IIjQXC^`
zmpq6R#nMs4t|l<B{hNXB?8`zK)=iL^+5v||%@g8{(6y)C=a8ghcJ{eD8^84)^TyaW
zeARSf9P*Kyv{J9;pg6#pYSMOmw%b?sfa+QF3j;-V^m|VxK8}#YQlB!_GlNYDy_{j&
z<F-Neq?CbMD^|o%BiAz8X?8e!3Iw;^S60{vj$6!l>$$5#`&OquI9zEzzLuTp|Fzyd
zCZ`xXfPF3qq7uNu+_p|kF#Ah{so5T5JF-ljM#NdTre%hFM!|O9RIlQoc)>N=q^c{D
z?1li4x(Ae`=<CHTm~ge`VP%hZUfs0MG1cfzF`N)ozVn=-mva-}aJ?i?CO8NxYy;2l
z?~PA=hg#-TZynd9Wzof622YbSQNZB>G4lV!L%txgoCBct2zA{n)|#k(qHD79kY2C%
zg{?2SvOm=SEE4y?Dp3?c(lJ&kx?r!S$xHVhOW|jXO0SV#kp`cd2Q9K6F>qA|n!mog
zncw_l!Q=ibA+GmURPV~mqtNF!hd;8Wirc+BwiPGf?g8qb=j|u@E;HdO%ufk`Ml$Zq
z%k1i=ug^4TOM88sijwY8$yiSni+WQ{*eS6zol=E3s8Mw_XBxK@DE|artk7(ud%Plo
ztC<fu3E+AE`uBuoB6uGuIlcUT*cDgo>~yL>3Jw*YugSR4RB2w1=?~^N)wmplrg$i#
zmtM{P=$i7+)~nl2jTDR@P|v;+rxd`gohTnEY8Ur?ZhCxNf!qDSCq>t5%d=b_!fjae
z-b;!myJC5k-?lQ_Yb<g}8hrV^;SMIAd~RK=ZRW-EN3I{0AOBWu8tKVjNr*QP<)?^O
z(Mw3mif_k6wK)jiDXt&l=MQPtvXbuA$N?LE?;~rn-E&_B=9Q;ohtSuFe_@|fxOLHU
zD^<5{Tm5kkeSQDpU&=>PLECoJJ-;BG(xxQxiWNG}D&iBtJX89GzoEQ%Nw114`>}!z
zM`KUK@3|NV2Wp=P&-7si)kk?oQ(We&ARsWqj7>&xT2(?<x3g9U6d<T7Kwsap@-~DB
z)bgOI57_I)s#sv;j7b#^mJIiW<|L#>O&Vk6v%t@~rrlqA_V))?+3?+L$ayT=+0HzO
zJWSoh)CAm^qTImdc}`5@ow2(X<~N+EdiiJPtq9tSF*A7wFDq}v!CYz~@Y2a&m>D#M
zII`ATX#Sh1CzBv~W&TK!!^~ZiRTf8#=K(gXKPYqPycf>t`BjROL@GzO>z#p-%PF8R
zbN#I^BRzKu)|=&g-dHs$@Al`tEmYkt1nFIxhG90So=XQmS~p13rtj2ks_DMSq+V-I
z{f;%n#3!_Ar$0|e0n_G0v(CJo8*VBU?WI>Qw4cPxHopcfIMR4-yq`658lssgDTT+*
z;hsNQZvwnrBS9tUxV^)*+MJmWR<+V0-|o`Nq|QtZhPX9bOBHS(3{1{!)}ZG%&mYbr
zTvfXqUFB8}wEI(EvQ!yK1{6X7NU&0x{0O=`)SlhHQf^*pTab}<mT&v>vAOOR;f6C)
zQ*bP5D$pfpAUZ=w|Aq@>Q7zo7cbVlppWcJEg1#QH-Yuq=J^7kbr-nN^GK?C)`f;BK
zi!kf|(A;m|0DT-nAZxpYiMO1I_xh)2j~A1S?u>d~q^$qx0CqH#3X=d74n)VEBIO9b
zJlDN#C-p1o43z}Y6=2G&re+3HI$)8Ef&a56``-TvVNckJ21o#yc?r*4$6F#uR&2j7
zt$;~u9hl7C$JR)4<1pDM7(@Z-8@-$<1y^Vf%VgwQ^|phljR<YpT#nFo2vqb-NZxcj
z-g&}gpfRq7*J;K&M9_**9dx3M;fR_Za`41;J4DayI(Fr*Q5Odi>t$^_J6EUPypWMN
z*`7QzlHKJ@Kb3^`q>g<*4_2TrLwDLwr^gY(jT#}-hL_OGFgq<rrnIlO-J6#a92@O?
z5X;@VL3htUrk}t1V>NfqCzJ#9BQpWL*ke<7tocX1MMJ?r6rLX)c8HOHKegvVbkmQ$
z0%oDKbA*gwRpw_|?A5yGd@6D~do#C9#!2!NHip}M+Zm(NUR)`s&M^zg@N)LnQQhA&
z0an`YbAxi;vVA&Y;cTP}y6H*WaC<M}TI;ka8+!>2upMrdnpDteQtr7S{Jx7ZQAkYL
zU_jgqV}Hic@62*8V9yDI$Niyo3uRvWMt=3TA1==RXkVq|1@2-a;sGc-jQ=cl`&A>@
zySrgJ?rS~7)L<I}tP*Qak4p_UCxw86Zz@yhFWN$@eIJSyp$)O&3pd0#)(QJBs=D(O
zV?2;rT<~mLHRXtsSTs)5@veIXe?eBN!?sPI@vyjA?U|n$du&_x_sV}$;1s;k$Ts^e
z#9Tquf!jDEcB9~vk|;f9&w&gGR8E;DsHTKBY4Wk}kYl+p-vvkSv_Bng)^RZHxc$}B
zbi7(Xz|%F_=n+@eQO`<3NkUrM@!FYQA655f{{BIG<Kn(Xy6zs9t<u;3IIC3DeQ8)#
zxJOlxsZD0kry(jtwf)MZWJ@xedBft;ITG?XlAg$erw~fh%8Lp4mfdEsOZS`^ctNSa
ziS?UJv?1R}=oIW7P1CPBPN?$3I!Usq51j6@tm3Mw<>BHisgdpIoO*7l@6$J;uiszG
z7)IM(ENbBwmh3dvQ0^RmU)0a4!z;o@h#NCHSoi#nw)J}vTS%J-;%+0plF^&f+IOta
zROs(>9yrIeNacqbu9d5<&_x+N&+;Y@4AJ<~6_RZs4HxJrHg4U$BQ~gP&#h2)YF0T`
za1yc1<eoy)zrK2%!fC_tBY2~OrNT}#eqO@4*DAI*#6CsxGKFCJ31WF0+sJ!wif}z6
zNpUPbop}LeVGg-+i?gpYwykm67o!l$gGX<D*J`-B^UgJ)nZC$72M=>Ws%ZaFlh#yB
z?nF)l65%1$vIfCc{J9Ms7G-<F-m~#N4BjiEvHwCa+OvT0w6p%;W!e>Gf}KFeE*Ue>
z*5@FA#_T?_lt*ABZ(oV6nZ0Fk;2v+{DALHhK`2YuSb6aZ1vTXnq-ch&`6_T{FM3#j
ztDl-Jl+5A$iXXH@GMpC0Lzdgz0SrEjYv%+PKD>YzzcCF*+q9M7Hf(6h0zVo4F|ciW
zK!at@DS?6&(`nz@-JZJyrh<IdxCY)TuAYnf@}fA5n{v+|@ym(pAKUSKORAJw7DvM(
zN@1o39>Ip*{U9?Gw6I2J!57khb$nVXNh~WI(qqDkzJHKYNJzU*tyXy!qYoszT<S1{
z(YBge$uKX6$rV(ITDkRm?Yf_0x8{G(OF-fd+?dkkE4H54_ZYU@+`9fqLeVtLrny4(
zhu<SaPFGwPIt|x(B6B4|s?Us%f@k$N3p%{?;$L?;kAl9Pg%NL{N6yRLBq0lQFPFi-
zGDUn1+n|PSrqj1m`HbohN@Mtmk5ein<0Ii!pm7>ft~w=PxnGVCHAIp_M-~GwfkuL~
zlG(EZv5~0W{TCik)y*ZMks5U3IKkjF;#&;0zGN^6iksEjxpw*RchT2{P?Q(X1Np^w
zZ{%3z7FVnz`ua|ot=x57T3TAcutH=xc$RjyuQwl7gG`YLs3l%cYi01=I_w9NOu;#{
zV&yV+ijfH$LeLLmD*G^^WVnYYMy;E;GovLNnqOWOOMv*T)KmDG^;TUdqVXFyAIXQX
zIMJXwSlIIEa=3o5<nAh!vB!|m`mva&5QwKTCe@r&oyMYY5OOrV_1b6t=lyNjNbO|f
zA3OBGa-||U5?EE5UA))5x9kyW)UoeBl>aT*cueKb`KgXt?%eL}`Cp%$3Uv|&)5~x0
zj+p+F4nN)Y(RtR?us*F>*Lk|fE`bUSuhqv%3v@OD20(o9edK-59mie^&y8Mi`0UOY
zI`|fxK64}`p>5wEZ41UB5H$=TyFss(GgJvfT?NDb1Q+R<X<7xVKb7GYQtl!0(Hr1b
zW<849_nX&f5?m`)9hTzDez`(>n+}JO!eOxES*+5*YLG`9PW^Lh`h{(08h4_TA*RcM
zEEBQ<j9wcXFu+*q*p@p`>^<4y2~Gl>jxam~PE=+Ucm6x>yuJg*9a%|M^K!1=5&vv_
zfo(<5V>QU9R-<-mC142rl_)Xf_gH)K8!J4&!u>xO@$ReqzVr#H6q+HGLPxSz#Cs&U
z4QoVMCqcS;L=N{TCgZxeI0zi{Y`dMD^Mo;T(MM(GKs9by%dT+Y7alAXiI=+gkW7&9
zn$JD^{_XLrPr?yxFp=JKP%rE4dg!-2QfvlA?B6Vst}DYjrP3`81S)v1a8r)Fe%0bE
z%@?~GUo1ZuIQq@fQ%b=r)Oxv_ec-_gn^TLB50EhZ()a5rOsxn(#1(rzBL-C4W^bz1
z2`zp4JJMjm9%Vq%eU1_)D)xYvWIW8&+T=Ixt&CxLsn)%<8?h*-@d^iFHN78^*{!Gj
z@(b3m@8U8=B3{mf7Y*Oi0xu2o1vhqn-PMrFS{&x$_Z~>*@a3r}-o=F8Epmyq+7PZH
zSwoU%K}vQ8Ny&b^zZ~E$&k8AmWv(5^x~EVo!$jo-*};eqpZIz^e)oUkCp-s|M|dpk
zaBfAImWKOJgIr>HkEbxrV?u$vE6F`dRo8lw=`22G4QYs9v1tP%Nc?OK(5W&ywpPkW
z_>6R^U6)ys+6Ugb|D3`fB)3gd<E8%ROmXovAe}cpv~i-MIk5Tv4>q9jKvIJ*Pd?^d
zQ?4F6jICHRM1&KmUC1>SGhN7Eau1Po#TWUQYWUoa%GmqgS^lUYkjRoXifrw~aE?@R
z!X#5)9Kf<<68~}sTb^9^WoXHHJMqVVSP*1Y4scVSAJuxJDL(nv8z{&hF0sVw3`m$m
zLK0Z7Ug%>6!_v>&Q3dPt4I~x*PN^6{;4XPbQ7R;s^WSH+OzD-UU_T=<e<&Y`uNTW!
zvi(0i<taV*B5C*UJ*1By;}iY=@H|cd;UT3byu&1tSR(DZ4AEH;)^&UU-zNhn<ff%=
z2X|B1)&BpZapfq16EM%;39nribMh=w>qJz8E5up+ivp=ce>(SE0*HsTpptV~%S*6U
zB-U%q!oDeXhure-9S!H#-hz`tnGj7XDL0TX0_Z^zU<EpAhDG`ExI<=2dMWWGYp!1*
zf4MY%2XMUc{@lNaCVmG+EX%(vEHyh$oT3(Segco;x7BUx+b}^sL*~S+<K+6a;z!i#
zH?G3ieJdAW=Vr;u0<TJ~enaN8e1T>E2IW1M7$1Dfh2-}R#W!0{icZMGBbR?)dc)$M
zZ;H3wd@T5u_|Ct%z{6$VryW+0`!`JBJSK36j~;)u_sDkeF_N+9)iu4sj*VRXYw6Qp
zRcE;h(AT7={66+Gx^*p4P)2^QmU96JC$7;7>*VL+MLhnM736<B#M9$8lG2t=)V1(`
zFcts=HuIXa9xZr%+)t_QQ!W)x!O;sgtw_T5qL%BxUvtHOC;7zTiK^hC<2pJ)6RJnN
zV;^3-Q%Rq*4mP}@w?mI)rIzX5oxe>^*y%CpA};@gEc%a;4ZE*CIYzQbr5`uZDe?pQ
z&<$N<B!=D+-msOAT}W<=KGTim7odKUCx<00sNuWn%-5{yDjR)sa;^RCxDfNte1vts
znizN-O~z4_D5vO8QA;0hF|@vVfV)m9N3~X()(Rl*3dBgstxtALF1e2wR3rJSR4#q`
zJ0Hm-7wnS(!}wLH@xO3T$vf=737{LeRg4#*-T$)ZXH5~inUo!f-~SCKb;|=Ep8ld>
zO-)|^-&FkyKSBqZQLP`lId3cj)xd1>kq;0{=BpYMKCScQn5i`p|Iy*P(GJurbU8_=
zM^txzy`K26C5yZO)?&Q(;+kxh2Z1c}ubF~X=bA{&F~r-|QDdir1pwi5T&C30JJ*~1
z<q05TFJ*>XkL^A*u<8*YKVMXL&HHy5p}v4j^tPN4(VSt;P~HF8OkoL#|9Ht`c>Opp
z>bf%h>q6Z<0;4U*gP*y${+B7`)+mO5;sj>7Ot5YAuOpoPr`lK?(+W4ekpLc2+atrY
zvh;TkV=t{A_+M=)rC%B_e6-_)WWw-@%dyJ&e<va<{yDUI0UQq-JJB;=G(NagG;A7N
zFPiJ0f`<c=!V@^`rXFG0F?9y(FaJkV3fa;t2n$JVJy=aDiMTmRIgbA!X?_G`o^~IM
z_)wI7$z)vrZhBWtDPq<CK*AnsYW6iQE12g9l%MAc>yk3-GTFhdfL~Q0=s<LF*lft#
z*C7buruBkJd=9l8F7!_Cfv)o$YuO#)3PCDpu(<q&<xeYoz$|wI@srzWVhWe8VJ2t&
zPXuO80s<M?J=ABJB=N|WmUW#VFEYrA$HNu{x4JhP=&laSg$i0ZuOH^(T;+UFfcoaH
zux+@$G%z+{kkN-Df%0EHFmhb~d$i`{=;}<PhPS-XL%-Ah=l9{TKrVPS_AWXWuy%e=
zkd?MdG-4-te;n$_KRAkLTDj3s!keuld4t@NIx>m9D=9UPhwvbI!R<)q;XE~-@_!7>
zBi_}mM`7?=7B}CPB}9`FgWeXDJL2dbtr=R0?1-#;lTi<nz3asMjV_)-qny&}&`ocn
z1B<^NqXQ57-X?!mM9nF)mlXZQrTeR6gS_Aij~s^U;O%6(C&x{f))VJLUzc(QDp*%x
z<Df0U#~uF}p_)B;*1sw<X1Pni{}Kli7ex-=ijF;ta-m|oDMM}z*n|M1`mrv}pvA)Z
zirld2kl$EMArYAx{>#N&JbRnFg~4avvW{Sr#{sZV^+qnLpCjoSdU4M3)cCFINF%}p
zQ2z}rGgh+o{?XQ|^_Gi^aex)n!JDm=O-u1M?l$Gc=k&g;(w0dZ1roH>)mHN=i(|m*
zksn`2bh>O<=2E9QjIzVjiuPQ&=I`ZmIW+Y~aN*lGrmv3HVmlt;#kIV@Hk3tv1U&1g
z)|74cMSLo1Xv;D+R+QR$jY*{I(n23VZ2q;aXnK+y1G6(KgG3E4J#XDp*i;ZPn=F@y
z#Az&sd<vlroj(=|xY~oK?8dIOOeU_8d@)y-RZFn+X>kl<uAc<&<JSI)%fZxf{|^x<
z7X?l-X)Gs_S<VJEv@Qir<lDdI^bm_fa~)ENG$$#Q*7<SFA5j^AqFpmkoC5XrIy}z!
z1i)^F@`TMejh5^EbP>o}-+C9jlqyl?Dp~kX0tHD}>eGfV&%WhFP^&BJMEvvzg1=hf
zeB!4S)re&dgg3q_5MQwr7caPKtmluv3+S;rU_7cgdE95|;T*j6EVZieNmy+;W|fmd
zyoiSPFOrvUgCyC;ElfFtaHv!d+N*42`0LiE6`ykDp2tYc`{Px#5foXN!ECM@^}h|_
z7vu$MQt0|g>B4M}YW$x(TJ8(vvU7`81ClcDA|G4#PEc3>7yvQ+721wSA+<93fcsi;
z1d@@8H3`fdd0iWoH!ZTpcsUeO<eLX%>l9@6L%vaE>Q2!nm+%zNU6Bmn&V+prZ7RuH
zJIy}(-NQcJ3`p45x#3VyFrNaH;;C_wW{P;EL<9$9OD_Q}aE_D^>(ynpR`?#yPc~eE
z;Ct)h-bK@%4rueBTO~4@8a8RXL~0kwx!;%u;XJ4?hwr&a<)nAgN0R#`K=nQX5W)~D
zVVqdgR*@o#dw=Y_0+qF_fu@9xt*3_c9EM)rZ;Dea3u?)O^nZs34a-=jL*<rpSe)e^
zA?sAAQCAB-f&DURc{YRX$@$-J0HFS6v+nKB?o*w&y4+I*9*%O!xovit=|Ai+R&jXv
z?Jn~Sz|OZqLE8i=WSlTkA2|#gy<P%J{+n_yAi%PK$xilO09<w0U>j5}ubujy6L})V
zIHV@v{$6{wlu7)}wW5OCPK|%CG-v8OSb7rRPB#a3p`H$~)29+)+W@~_37NxGo1tlU
z`BQ!<eD~V4re2o$6pHH8fdSW3JiL?;BNcRIytnqJNi|eS451YNxYw3nsjvj_3dXS8
zE?vLsS+0uc;C(>pP%#|2*;yk&@p^mR?3?HSog3SPf8o!-ZlS3!k9gV>Qi=woG@55C
zhO@iO?A?y3{kfbAaM^4eVb)UsWtJ=`17}P33|i|r0#1Gf>h4;ii<(<o83dkH2IUd(
zo26tgsXUPC^mX5_DDEqWeS5c+t$Ib*zovmrx9cwd8;4m!6EtnT@IrKh{n92T>}Cw_
z^}3SGCUCxNwQE<(FZe$6^}mS6iZa*_B{#+ctRf^q7BvDN_TGj=AQ9h-X3Ykn4<)j%
zv*yfCt$YCNCy9COwFRV9D_|cDK!U`CZKZ=KFUouZJaZ<a&h1zIvz>W1>44Pk5<JY?
zm2N0Y1bo4c&!-6j^s7OnM*vyU%Cup|xOgO}9~f#ofr@6uI;^Vgc(qS(Xm<u|EN(-v
zm&is0*x2w<^vQ0c<x=4H;@{4vV0lepo2IgsqOBM9GKKZtP^e+bbh#i%ls6tUHG<El
zp@!B5AnVJ3Vmye?IC}q1;Tzc0_76(vB{+86{P%tCk^YYTh5Er^F+J8(#FSy#Bca<b
zei!?aYHqBGrzp5Y@G}WQ8;DN#l1zD<Bp%+HECPEhDs0ZIpssN7NbWh_i)<E&R50N9
zkuEy363#Tcv$+{|QD*3yWwjZoBxz)37_}nd!d(vR<yAGZEL`BR>_Gh4=SLMN*Qj#p
zeYjZ9H2Qt&8VZT1{+b5>&uRl0r5+U{w>1Gx#n1|1KIxPPHV72Srmya~T8M1nD(#VV
zaidJ7U1mu<9E2L4fUzMu7tC^%sEj=@`h$3}AF!{(VG7VH?XW}EboA3JWFrc))#MCp
z9he$QuWbi#z>R*?fe-QVWjY+pUqA&RxJ>{HH<-YxAq8Tja6Sym^;VSuY(PpceZ+Xm
z2r0id@EX?aK3wtzRALrAr%KMZ28rSE%oZK1fdzoFw!^lyVXw0bb*wFyI+V(Q3p)|L
z3CFG$7f#em@&g28cCtlRdz?Z&?l=K-2#e_Bn@2`+d-ociy>Raj3#p9X%ssi$O%~Je
zFa()5)fed%Tp51S;)N3^q$%LFJpBs$u*!xX4bI&KfQ`-2kDC{=<tZ3-+p_y)9a56+
zavYa#KmEXSAV6sAfWSb=)v_m<FB4e(pMQ;a2KAwP$S1#O0Za#>DCsJ`AXw>{V<Tfz
zHthEQjsD(IaY}KQZ$Nt3Z$NT|EeJXOx>+VMKME!(8}|RQcO0l1kBf*a9Jmi~+vAfL
z!aATDADS+!9&mU{FkSBz$^l?>p$mzFZ(*R}{4q7Rv^N1pfZgyn&?G0_JgmX!guzRB
zv%Gt85d$BZ`AQRZXv@-NOb#TZv^)Lm><zszpxZxiNVZ45Ezj)qMBN3#H0*Sm7Vd@Z
zLYo@w62>o;iOy>YBS<T&!bu_4E~lSiwATq%7v@!czuBBV9_FR9@p4LwT|n|X#9~^1
zA7F|$E@W++hskAvEv>iPCLYjL%U@{Ne+KKMHG3&Qtc7ontagL7bKeI=Cu>vb03N%L
zCy#1)3e5+0!!8`#_MaS?>a+3@4Z=3sk}bVHKcnp?&rR+cR-FFxWQ=lhN0imYUcZ3^
zD+yCaWNT7yYI0nzR-|?34ghtH7uf)=T&RU^&!&0LPc~a~n%*0S)i@Py^4xnSZ|=;m
zSDW@(*I0$63w;+_(3w7w)G{0NB=h4}>*}YT$(v_RNqDs>oUK(7B@E}O#Jcpgv)_`=
z`!-N0-O?mAeZsV5I!~`@Qfs#En6j>d=0teXd^u0z&s-^+%MKM(WF(H-lD-vT_P#o*
zMU&ypu^NE+J$5ZhZDx#wvvA{^;+3m{1<cJ3M>wV-#@`k;wRHd#-vlgLKA;_J0OhM{
z49N3g5UyqbA}Xdn)&C`nBL{%~qdk`8`)vV}rwO7mjG!h#MW_QDIT3a=s>I0b>7^R)
zelZa}1sHq-1mNWR4ItJ>rNMTmBlvCD_;nR9#I{>(6-J*9*v<6EoU$nfaY=$*`!eb?
z^@4V1F-k^&MMfY9GJ9kJri1`Zho5Phy+rmZY&|=7vjg@$OADt-)C?Nx=n`O$UK{(@
zH?*=Di0Xe?fIu$hwE)=m<iV87+nt6d&*?i?N=rwUxP$0-Zap(0HwaKuZJQ=#LK;B9
zjY06CL&*G;Eo{KQxBc-8W*lr@DiL&U_X-8boiQLj5@{*?V?*48prOO&-d`1RW#0k8
zlMdFry*=C9++jE~fDbGnf~Lk`#JeS(m5*LXQl3{RJ2x-tq*!*|x3bPcxw!pSmy`7T
zNkQ|mf~ll6;@Fs?Oc-O=!`n{|5aPg@(D1iV2$G06&5q@{`ZkJUV8cMmjTV6K_o>D0
z+iTM(Px#7t6<v8y{|8@!D`2DZPWA@3Cpfo)!BafjnCtWk&3LlcBT;!1+H;y+jht5~
z8+MOt#GD<>d!Kswf?|a1Rwac`m7wOo?Y0UVIo?!$wf(ohs!soqW^YD~9a;#zS!FD~
z7ieW_*}x|?uhg40Qvu+VYk8sv$DDqYGO~sQ^BK1TLld2~PJo5f2u6Ly+sCDF#Bx6s
z!*L9h37Pwg=?~^v{JBLS@Y^<_KIYy~D5?xf;R+r&#-(6xx5F{4q2nqo7xwI$w3^;+
z_3l{Bq)OSva~2hLO)cPN8m6`6K3_<<G_DadE?>qhD4RUaM9%#_+q~e1sBURoE$djH
za@kLSvDu6b<UoQYnQ--mvEMtTTXq983IH%ywAX$V_6Paz_n8-3UW+DtOrO|`7fzn<
z3m$L1>ZQyk-5d9DlXY0*f{k>;^m8lONrh8N%>{Pj9Nm)Ia=P_`@#B&sAwu^v6VvEF
z1cvJLmX$K<4i)eiogJ`hk4ZjfS#BNPI~XdHeD#XXaJ|j>Ubh<dCe>cQ_E^wa3Qh_#
zW&2zTXuS#8{EN;hEpHs|CRgW~^|eR-<kIG}mVR$h*F`C`X?vNpT|uRlW{Q<q*#)Od
zBO4HC|FlEu%**!YRw`xMWxCK@;?P<0$9XdSg{r*?$uyM-KTm2~U5hk!TX-S_2>MXx
zWTCz@V&pdGP$>F$th(Bqjc2Wg{qkA0t$Jbx<10r1Ioi8z`v=DinUVeh7XX6!T1meb
zwwCI_XZ2~s%$LiK^mwYyZy|j3YmZT8SVjAhp`tmlRg1Qm?F^1jBBdf@$5M1Ep@Y9m
zHi1skw3^@Oo73YtAcXd28x_}mJV2#aJlV*Y1YVL2N}BhIWS*mQF|*8;@Uat?J}AqH
z9-D!M$(|puO+-O8q`6<x5Y5X*E%VA1qZA}$b0az<j<Y+G=Lh|ePhPdD`tbXr3H=#}
z3HqKz)eCv%Me-!kcQf!}vF;TYQgnRX9?(&~c<)$He@!FBl@7hB{5W3{3QcbkMKjXK
z0};y6{4WnVL!WL?Y+!w+`~;;=Hxc<Xd;1`5ig9u-f!%}kW$1ThmW3SC7I7i_-BBG`
zjW!*o*<tAeunpB#$7<dX;KR6wb-DiBcOKL0MGGg63utSYDn`hiRVt|h9W%mtTVY<V
zOdfZnA0>V*zJzx8u#G}Q%a0by8$yPSBfP;%gsHe9jI{!pCLtqkR!XN?+)T*Jr^QI8
z$2fzvNxFq>vJe4?U5gyn@f~;}#Lg^@E5uA(VoVHT6N{XI4H)$WZpm}IFZf5(v`+*F
z^5&^u`8cRd?yS?{mK@NLWAW9^AOVok2WzaVRV6;9eF2x_mU@Vt|4fMJXDR78<0mLv
z`W(Wiedt5%-u#`jt^@ID!MsLS5D@W=r#)3nFO^uD{8QA<oB+_E;?1is_FY#xH~9+u
zBzv2O4|&=nYVJHV{;g4;<rvoBGW>bOg`O;CT%^}yCava9aduv9Tv8r4TkFxz*_`Ap
z$KN~L6_v5#86L@e@Re|y>Da^w*LN#Sy;4cQM@{Q3ma@urNy#snETnO6{bO-8GqacG
z#Z2S06nglCY~%A$19XW=@{=4fGY+1jx{Wq!E{uKV-o@EDwQ+L`^RpqzviAFGCT`u$
z;w+cNxiC%F6*g8H+YGBt3o*>hzO=+NLkx6;;yT3|#}n+~PH+~h$dvvFiCZndx(a$7
zoE+EB3Ggq9ZSvrnIncx@{k&zcN)J61-I204I)*1;az{HffXI$3WO^lv+ZNa16To*M
zLiD8Mo*r&N={!4hF1y}-?O>Da{@cd9`-*j^V!n%!Z<rZ7YvVMQCK|Q5oriA6^QYM#
zP(Ig!w2E2^-1x6W<Y`AvMJ11GsH8?_^}l>-Uu~l>dLdaqr$=?4%BYu{=7%ifu)VZw
z`6jc^-THWEg7!MZjVT;P(?%yt>NHQkC^;5(V&GOq$MNa}6Hl`s_OO;IDe@Ns=6M_0
zJSr3Z$ezSt8m}wg2<foRw!zdX)cHra3S`g8t1(XeicYY~bA4w$?Pk(2i?tk$($qmG
zv;QiSl~b@>c*LDoPFDb%0g72u_=gqsRn&sg@Smz+4?wxKj{d*}%y+2@vuWWN-=8s4
zT9y@u2=A+XT4uvqCNHow=0>+nCmCNLd^(n0P#O8YgD$o&OM8$m1GdMV+9hZi)ic(`
z*P`_`SUYl|zdiovg%%27m4RO|RB8=f%J#B;kzQXo4$8OL?^F2{hWf~9qE(2W#C!pV
zy>@`Ft+4NmrcE`sK<;Wx@|v#B%NfoS-(&F9&mH@Iv)pAQB*48!Ae(CDQ>)mr(Ex1U
zZ^9D;zu~PD*HcW!?}epM>wY0nnan~%u6M&UPUkv;su{nXn|eBY{a6sH*RAmtSJHq#
z)2}Ep#_SBU!4INk5?r-h5zVF$_S8g9^6pbfl~d~t9PkrDy8jT$qzTC_X`+seeHOVC
zw_<mrgZhs>HrqQ=G@I<xZCWpgFG)AC^)}8=Z!;vEhE65reh1S!{@h*4&DVEipJ#L5
z<!Lqnd4ij&V%JZ))M$M4MW$@kNkg<}uBoJU%n^%n<>g$!p!`_g`Ot=nSw(L>f?i4Y
zgxR=#ah=1JG5)X?CVy@_ZT@ll(3F-)(@75<bwfon!fBzm_FgT<gp4*bT}bU1DV@@G
zN$0vSO}8WdX&-NdkkP?cu&X*c*?^c8W!6A7ZiMy#%y?IRCw^7AkZ<EnQDAdJrmSF3
zMbNkL+!V)5oaIs|hMe#b&RKL-)RToB8@;qF_;|E!>*f1$@%Z<F$35RNR6cQ}^GV@(
z=X)#Zy~%d3E=ipfJ5MEAQL-*ZUB~BN#`xKDE=KlJbiRUKGj0Z(Lv>jE4{aJW$@zSb
zP&PQjE>5mr(N9-EAam=s(HU<qgFWXxnSwrp`&{NMTI{u96C;D@S*6eaN7-8kM74J9
z|A34!HiCf?N`neYNe>3y-6$cAN+~sjEfUfxDBXyJjHE?L4lqcoAPou($RO}t8v#A%
zdEe*v{{A}$VbAQn?^yR**Y#QGSUmgn^gx+;w1Mi=JXWz2{4x1dfm<6`ua(bOUaTF&
z#^o*#CiXZn9s6eLRT3^_*HvKPR}*RM$?GbCl|DFk)CpAM*=T3>cS3QU&*&9nMpZvN
zEQm5%otnK$tC$O^x2XJ|eK}8l1=nj!`6~=BMCZ<prrh2u{;KtA$xpF+VP{jbnnxly
zGGq7eBGMo35a&}%>6;E&NK{4Pm$x$t*`>ImTmhiKq<*$&vnIsCk>D+aD&PoGl(dU(
z+ID9?L@#GP0vBKoqd0+w0x90eA!QIX;jV%|1)>*~0^PQz`q#>w*-y1uo_$6gYI=Op
zP2s9*p;l8Vq(U->SFkOI=9Qs2V?UQeBoYrh)=YAb&yC`jSrp_nAE+i$^L+<RF3V(l
z7Th&Wc-cW}(bWCXRNc-Y!e`<~HJO%~TeE<!@$gfu5dl6ZcE+MEYd&SGKtrq$zSMW@
zY}jz2;qbypOCpzTqq9a5`mXCqQNZnldx(k!YwDF*wmg>`f1Y<<QdTuhx>-2uWuvg<
z%Uda{!xF2bC8x4>SFEnM>s(4cm1<mp9qM-O<uu-YCB>-BZ77*~EVg6MDjd?Hq~h=%
zhw)&a)iioc&qA9L0#f5`drxy4^ImysYR@z%_T$}wF2%+W-$uliBsY|%JbFvJE9mW`
zHdQKqlXTzbj_moOnNo}Trn~J!!f*4eW&ig{kn7#S?4htHw;iPzL7Y>#Vuc0%p7{`t
z!0~`TbsI|RfgBYKPcUe&olt3r2;CcFwG)sBx}YvUFZn<&?~xS8&Sc3W=tW2(a3+s*
z7hg+u{*==RZi$x-V<N@^fxSK8RridDmw(eH-7w0U@4Nd~sE~PG=!_ZK@}#j0uR5Yz
zhe)?h9k3-YJ;WoT1}mQ!)X7E&5_{?iUnObUo)`q?QS1t-cZyks#kf~1)lfSPmz;g4
z{zIPB7AFqj+{`!Vd@YvZPLrKwgU(%GfSc9BKEIC2<@A=X@3w!PZFD}ysX?AvE6q`F
zNq)B!9%q{hy$))r5?!sG^opykE%aSZ7+WXk!a7D#1P73XclLG0Z{3&+D4^-c&E6RY
z{zS}Y@Sj);w8{F+JY=+?i6uRK=KM*)tzThb=$e3e@TNxQfuFbb_vj8AbY`4=V&h#+
zfuXhZ6m>2czci$@p)ep9y}X&Ry2QTkn_}L1z6x;q1NMs-pQf*EaN{n#N7EGUS2Q~O
zkR3tzOdpjQ>wHl=(W<?Vm(tWzl|JxYVV0p(V6JQZYVuZe-rl&2ZJwrM{L6d4Er-HH
zY1IZklAj%^UmhfT51R}gj$iTY$0swTWaXI;1Et19U^27uHdqN}_`v2LjGtbfTb@xk
z&1)aH>?j+K&)_cq`CPO@>OiY0s$TofIbQIbBc|j!dC>vyEp@}QLLrLZ4RopkuM~5|
zIG&ymHdF7DI%;QzfD{&En->efKzddEF2)sXb9C&Urym}u0&dd$(SaB%FNX}^@PuRq
z4bMSB>*^J;M)fddY{f0ekrZ&w$@*NWy~}F6H-zxIflEcP>Sk`~%%fi5XoYXrX}d82
z#<}E^lF|)FC9tLGSV$A)A!o~?eyn@yM-b#k_BqDhP4WsoJt7{Q@7|c9P?pL{MS0}e
zLYumBq3+A8pTJpo1(GxwJm@T`b2}V0>Mkzd+Vo(mQ9g1w^P@Off9P%LO+Ijd6dQIT
zhe_Piu#IdUCK6uX-E>)9WD@+&DBRuG?4RzjN~=C(L%By0p?I7+^xUpw>Vk2r1mtR5
zU?_e1WD#$No=;azyvLz)-uMycgG@*P9Jx6QW*>6}^#_z0*NUo+&|Z3fjV@%%>(F7y
zg%SLwWsRS@PbyarLQ>$np_aoN$gYNMBV*3J83U%LX=Dlzz{^$8{kA7k_)Vbi&0nFp
z-pc;@iQ-YTgCk+aRf)9_aV)^;GcpC<#%WjeF4uO~&u{LAN9OO~QEz@cpNkE0vB(tU
z<S`*;HAJ3aIujE?fbSI+fz%oznzlu$zV>}=c96N5Ni`50KH_JNj_~jAp<-4K60Ida
zDLKFYZUOpq>tceePX~4^iLvNiNrLuJuUUxVFS7;?|Hk8!BB|#+u4*N?o=BJClK(h7
zlQAoC#zg;kla>FT2oJH^wtJ8T)W<DAYN1>B5}0uqQlxoAb{<)=f`rO}%A0saL7}SX
zBf&AgJBewZE(#kOQ_V&h5r3*B_d62|;|9Pcd|z0CUWF%4Ut%|@<}(yQqNVgSyhIV^
zm>1i~@+dr~;{5!I+l2>+ZP){|qejjRB|s*a=yRXuuUPf~F2T^Hlz>~><yGcV%gT~r
zU8w>76yK<?rxn3UCVpB-CrsCSqaXzO79(;eBl2sXUX5T=^~G~NUniSylm!PbB=FTi
zt)DM|H+!SjAGw`9QZc0byShpY!9Up`CUYjFUdgbr@R!FoZA>*hP<zPVXCirfYDP$v
zy6c+E@CJvKUnd+IT+B-C&xh2al|^W10G$Y_?m3w^{6+(-LgeMzD#_8asp7Osi4MsK
z121uD^o&PWzoxcfSZQsP1b~G+P`l4P)Lu>GF5;&Mrr1MG7IZq{it|DI8TpS(o|=r_
zT^Za{y;9$Pa9RKnrdVA`=>5W>WlpxmdhDr8e{6Y{HqTuOJ-)HP8S`jrkA!^QWUhBE
zQDYuRaw(3CN@7VtJOD{W$DdS+28VAaRJJU-;-&N)w66Y?PGn~Z=Cf|A*}<pEBV%Hi
zLI4+U)zzk_cBFhv6)vjPW(gQV$q^X5*UBt|dUGPH#F6%ucg}CG(T8O+9`>(EdJ<I-
zYt`y(3iP6=;1^<)<aElYU!!`-U&m8xkOlk5eucyrB-va(^f`QH3Sy)=Ql`0lz8=ep
zx`yBzo_fs4iVVj(#*UlKxNE~$L&+_szic(QUWJ;6RX}JsYAnaa($l2Xy%|Qq<VkBY
zEsGD;5>SUor32qt3g&tRIefWA3)fo7oFTVdZ~{Ja{20z{JQezkVXtk;P<W%qm;~B#
z4(+C_m}ySD58GD9j%d8Av6j|V^!J-Pjqv@-Xc_f#1w5pRtkyUZkhSRmAFx7g++?kk
zWLJXs^<{r2Fo5ctP&XrHCI}I?r5ddKDYbk*_?>oHabXX?>+oE0^3CbKr#mj}eLCB7
z;OItN8H(tdh7y&#*P>5EZu^BR;izo7U=p>=As4^G_`Ld)7hTcO_=wlr=$wK^>vjCa
zQ!%vuoX^RU-kDvRk1rqSI5Yx9mN`vFf8ip?1PWUblS}$z9z#6xIz)Q)=WZOnoDUa2
zV<|lQkvC5I84JZ;zB*&r@(Vbclepim0$ev@_{RaWaD&yE$KqFux~#c|{oE_ct-AsA
zq~N0tq6RK6C&Ls&1Wpka{8P+}rkM{ie{zexT6g&MLiImM5;!gwW=-Voe+WM4uUzg9
zDtG!B!vJ;@LyB$;gB)KJ#p7QS6Od-l4ax$0xDrndR5*>Ulo5O>*kL-@S+ueEmybZ8
z?<v5<oCAcV#s@m+0o7Ic(@l0p!@q!}G;Z6<u8N&uZ_aR?{ia0kujX8H{QkMws1;et
zQ8T&f`5sE@gros3)De!(YGo_8$HPdy5h{_F?^dg))W?X9MK!V+_8(Y2VOai7K<oq#
zayBW9r^^_|h0~=gS&I(M#CEuQnN*A#(L7GWEM25+H<T=(tx_DH!zMB8vos2!xJrz+
zYUP(Sw~&j#8BTXUbjGo=d|NIZn~ntxRwPB~(!%t$sK%yu{b+2R7H#1&fW93mDDeI#
zQ!!VQ7kHr_oF*ET<2<y}>gt@Js>#a$Ws|;XoBnc7MK5gw$UlcSSSDZ>E(lYiSt_x=
z2M7BdZ*dE2hi>he{(AfF<t%()Yfq~yYO>_das}QYOxx{Y2dkr-4k90&9k+UA<aVdY
z+ZU8mDj6w#Os)UwjOQ|Zda*>4n6clKnlCjWB_rd4#)3W^4ltVdh{@VSujKlPgV$5I
z-#`V$lc>pN`Br&A3Fm34A5?9^>G_PcuwnYExEHj3l53w4Inf3<PHSva1wvnM#Xiq7
z`RXsY$G{&BhK<@!+Xm=S*wIe2y4VKx>qh2M&VwgViOjy<rDIb?MpaJs<Flm!UuGFq
zkZE>nM%_uEDz0|uDmwVopital-IFC)`&slkt%S=n0;udVAYi4Col)e6l2v~eS>eoG
zK3=~=+(l3VuZg0L6}<I*fG>TuE6F<tb_a@pLRMl|iZhnAU6qN<y6v?SlkC&$Z+2iG
zWrj*g2GQE7Gu7iz@g*ofZB8@dBcPtVJA(uB;slUH^~wa6{Q{Iv+$9+}@EDqu#cTWP
zbiKM^iqy%Mnb|hYYbrAoMn1OKY3-yfbDVFvE^`~|+H#@EIe&|zFoSS(^y@kP?EHG1
zq4JAj(aJ}gF;+WIU!d?iPB%Fp=Q5cQQKLxvISE{wPcpSNI1AuAY|VJ29~7ZY-fjER
ze>}wt;Pfd8Wo_P+gm;(9^C4g;*ZU4uU`;u;RG8s8t)w;@^c@+);i9aVI@R5LPMO!>
zwzt@=VZtt@_b(ptugSH!`$p^_b<(#N>2=x`>d5xF<yJ;1xdx*JxnbZkb?~^5DnKI&
zUULizO$vJ30QSK#`(+LjT2yiOJEHy+WOOGUt7Nn+@S$_YmDIFS=-JAutqZC`&B_lE
zEzSLP&O!BzAc`KCcg%KbD77AnJY%X*Wr0r7FVtCCZB3T1ALebSk;VhH?W6KQ`B>(Y
z!v}`ONG>aj!)yn)R9MH8mb92ywl*qkrQCP0MY(b#7l27H@IHbZ75ONgnyNCBN2l1_
zhGesp7bqV)Eh!BC(G+gK0Eb8(>aRvyC)X8oJ~G>H_42gt=2X0*(VBTI*Yp01CWE@4
zuoPrD4mXxiOZHKd7c=IDG7tHoPqQ1dw;VxLE?SWQ?7f#@MoX-5zHpEWDtSLoI$RR;
z>&MU+W8u7hlw^%yJw1|{_EWq+IpDl3YZr2WJsfxT-O%}`c&u~_Qt{kU$Rp$Jn&7L=
z;&ODy6mW9*QIE5`Vu`D?$IYPP8a6ZD6*c8oCwIg^i~Z*WtPL**O%WUfE@_vA=KVc%
zoHR}oFRG)ZK4lghuSEMB^yC^>kO|3bO3_18ya~%y2H4Kb)I+bV#-??OiCGuOe1a(1
zA+G_?$`{JfH0N|SrBlyg57CJ3v5u@)%ov<0t@}YsJS}uh^W*ppvvj7X%wttHE)>|?
zCpAcraCjCG{FJ(KbX%p3Qz9(U{csNFmAGW4;0UwbLP1?&$1mI~x-At|M`;VwA{D?9
zwMZ?46}1wdSFdsGns)yrvAWoVQ8a{_(3a$dn4i8oPp_dQ)n=f))<fMnM95Ji&(tos
z*5B7dfriTUH82l1k7;}=JE@!hF1VueV}$B5i8wijSI*UtMUnSGrPQTaa+_w?L9_Sp
zXs_8z-jl3jTvK>r+IQqxP$EF1nSRe&_qM<r{4BY%xcgDjr=5jV?`f~Rb1~W|sQd81
z&bm`QB!6E`JQ>UH?8LzKq}Q@T%r&1Ga$__()dCAzZQ4?R0{TunY;J_#Kj*An7awBf
z%@r+S=KR)@CD3Kb)USwSn5GMED2^O_JkZh6Chf=CIwHRclal4&ORVNV4~ThUombee
z_CUn!p*aroG&{&1WDDQe(gX;XL(wAURmGSy#tfc<XH9nS3rEItwgXp1IX??eJrg_c
zj!iX^ILmJjL2^SV^LH22-DFX>g7Zh*Ru7B06%>Mo;0=j!=hzlri$<r0R>-xug9F;&
z_JRTLtE^+d^<a_8A&Q*Hlt1${(@7Lc$4dH+4g7|9_U#Up-sDgcj3}{Rcs&Dg>1<4>
z)8_F<7R{cZ7^45R3>!2aF7QHWVT@`vJ=bVSR<ZglBnDL8QD}hL-~^({yt8Mk=4M25
zc1P0d8=<Fvo>DLAnhKNX6%WhhOofn$)a6-h-;3ZGUm`%KUjb5IwetE^Ga#lM4liTx
zu*<93T0SlW>-8?HuN8|>Y9qGGpd@aw0B#_ez}i3lmi_HQBRaJCT0#RH61AU6q0gr0
zy)l&}Yzb7%YCK;#O}qi*F5OV2a0AL;lpz@&x%<x+o_d@fG+f}D{j1FtBupulXC<t{
zSU)lj!Hn;t_xLV(RM;ua)xRVB@Mq1*5~x2+*0RMHfYLx(bBAcdQ?b|Q=~4Ntb%elO
z2m<9bP=4%yGno_AIQ<4>Ps7Ya_t?01M^<zp9fv4!<~#JSMQN}8f_jKz$gU<JmLR9s
zLd%vD7iy<sh)g>dPGxP#(>_upvD5hQcy)@v@l#tM*Yg$Y)lHGwK4iKcXH<hEe&_?T
z2IQP^;=};W$M6Pj(U6$IunNds*4CcjmjEq>F|qcW-+<4SZC#a`>JH6SP<tG<@wx_H
z_{E4Hq_d0uf_>k^t|3Y;mkAwwjc)nb!Q)RTE<^W6He(i>q$5X?Syw<RqOkSl)dsm<
z>P&MK9g<Ef?+VS?I|Z*xtJvPqc<2_3H9)KhDbEdaFYl2EgV1ql63(=RtDu`eZ2YgG
zT)k}e5~(I@3sWr~Pzmyj4+W<0bqYh!?OB2LAb;_Q<Q$Z*cnld*mE)%@4kgL%4|1&I
zB)oT$EYCSYk#iEZnFUV;AIAt|Y{@c+cxdzz7p|bLB(LNp0I7hcHCbQfx`ByWH~;w&
zgt3kA#;rp6P$YV2Es(NcSnR+@+k<HFka9jtpE6M}%C~1OO*3pVyvZhRg<iHVr8=|&
zG<t6H6IKXFnKwfpID`trmbpe{AE$-l*m-{l>Vig(D)VePXLi?<$UQ=X`w)75?EXR0
z+^DH~ioy#xqjf=TbzH9lGBgti%?MDRT>!}$G|<n`&b0@sLEtR5IZ5^a!NZ6PVrbF=
z>$mk#9^Rq>s&q=8^Tt?zlU{`Vj9k1u!)Tr9(ch&SA5@KZrL_;gR>fZ!X%%J)ql)sj
z-E4}V_?QgK)5P;NC+A3Sbi0GuJX^9bY$)eNI--p6vGJYuo-eYw!!)1va|cJ9)HMOw
zTrU5$zMiPyE#EZ)2u{F77cveW_w&f*yQ|~%y=vQ1pa0`FrFPvwd3;14Qm>DLmJp3K
z+z2RqI{(q}XB4OAhHz5fTP)9E11Dk44=Hjj5mkBxsnhV&dAySWOr<wK{W53xIx`1X
zw<F;koJZ@nX!Dvky3D00426%mR3vJN!(?cD(dwK6X{_=90)z~<kfF$-1F2fdlQ(kh
z=VV^@yAIQ-=d}Tz)ns{TVIhpaAN&oXspEv)>C6VXB=K}Ivt`d@n}@!IbK)K4p6=>A
zQ53I2l|uTuk$_v~1~4(sqMB^f7!3f-uP_8>>Cu#BtHD;l{(TZsN*@|6n|;KUb6txo
z&4?*|l$0W!YaLpMAgJn;4@FLz&Ht2M0(H&(xLIYNTj)BZByS6;lScMipxeA&w@~xc
zrLZgKa#g`Yt!o-|gCUXB+dYvIzEqG1<n8Q6Ta(xFJ|Oti8czH9ynqxV@72wr43lXB
zB4uanodhyD6h~c-$>~jIjn7Xsb%wc7m7S5qN9RoK>AtdFEs^okx|;$3Bwb&C0dftz
zYj3$HYya<hAmK9QJ{^{XqcYK24O%1#B5Rm7uC4mSEmV{1P;S9jPQA}J2iZM^GF$ZL
z%wI`q?9emx)Xf&f-o6m3C8>orX!dBf`uY8H<Dm-4*}Gv_P<HIp2<?K^_VQ8SxbvDQ
zI$T9Xvv5~mDTmx-3OBLcNMcg8vgY1V`$&P&C(`s$F77!08@qQkFGlC|me|i8r3@Lz
z%CS^XT>Vv~S#=tuE;BKu=Y>%hv~{X{GcgNOy>SDRph@g-e%)PXAYJLi#p`N#ylE7)
zGa{~pBg5@%#J<ddQ>f%oG0`WoRW=V7k<&3k-qM-#(9Bx_ae*zo$#h=YXy4D&1N=9>
zTy#hDMNH3;N-$}D(guBA`IJnR87Gr#nN)`cy?<-UfIwKT8(XTZV7$S#Vn&VS?4$%O
zIyO`huAR$p5mY+zi&y;_wabI9N|F2VUj3NY@3q<+1=1T{d_KK-X65@WdqtaFLnnah
zbq$>)e|ik4^;y&dshDoa<JW6MglBq~Qx8rDF-k_%I$sgLMzKly+fUQnf5ToAzkEs2
zzFUou+!v!@yTB>(B2F7A%R%_{AP&P!d-1yg^PVkXn-Q2VatfPqzs>1<EnV{F!%5nQ
z%x4(hYgbe`mUMVd^^_>>M=J9))&&`mg6a5Vq4~>k<k@|_AWWo_b1c9CaSbdlfOBqt
zq`L$_IU?k)M-HSgH(NeHpO(?3^Lu})sPhd%%quOpL{8;qU~h!Jd5Uj^3$h=>zI<v-
zrZi^KEJYy*k{%(F5VYWc+Xt!pxEAHNmd#N7(0mq&^a_8IPw<s0z;C~ScEQWo(%LB{
zK1P^xDHz<v@=<)M2)XG)^RjZ-Io*E5f|SXEv!B<eek;m>P0QU=HGLA@<qkS3#VR+w
zd67wgHxJ2ngN#Z%-d?I|fmTeDV5*2HjjVL5;?uqGDO<CeqWQTDTT&Rrk<8vf=1P_6
z-Bvrz|0-BBB`|bbI~-J7`GKpB4F0IL+9wasTJVJ!DYtFIE$|5Oc$$b>xBYfJ_+(ID
z?tijR<|b3^__q-4_xR*#5O`}B$z=6|;0LW12nmC*=~R^}M}(}tD85tdWrv>4C)6Q>
zeRcN3AEOS)3BA{QK=SJ!5D*JKYbm09?lUF+2Rb@er*-_ty~VUl`ufN~2l<kZkg6mQ
z9vGC+BEs~NgOb=oJ|1y4>96Yu*q+8wNHdB!FX*0v{x#iNSvsmu7a~mqUf423=o!h>
zeMCZ<D~h;^22SQ(c|qIlmms03$W(x;i9VC=yCp<=INPWUXBFD<_{bLK=IzC`wY;dq
ztf@qi_+ytx1ow{adjOcN-|5K83ycvA^Uxr<J5l4M(}JiN?~6?vM~!O2AS46km^PVs
zfCM%Y4EeS4hFA4QF1KY_o${dH8@MCQ2hv(OFz|;O+D$9#4$Yi<3E9+dFXld8vT2Lt
zq5r&qB((c<AFJQ$RdBJ%_nq(!_?cQ=jSHiRaCVz}PTzazsyiSot?$#h=tN#p>#+98
z3v3@V;VK=Z;xl~4c(4vdeKKxZb(l(oYfvb=#j4kLjLLb(3<)NFD4TYr1CD#C?!eYa
zRc|8)2mhMz(%A7*DM$HZUqOdDtY?t?%U<$Hgpr$-Wn!oGeOP@UJsxh=5fv;w?Wi^Q
zQyDgK1*ZNXon4!_Q=j1dGB^91$Bj%Ws%@AD85dNj%zr%cjv*WWJw>;3LLjSh<_}Xw
zlY5}8a{OMfy^56s)#A@;uFf0?y2Kr{J%eFhen8Z~rCMfiCg$QcPRvYtEvfUA=l&6Y
zt)lrZn_-pnQ@Vw#<(^C5Y%{rEJLW3h2v4XcaxgYto1_Wa&-_z{iD41~ZY=2Eosckw
zBklO(t?cK1K>Sz;l^UK$iI1ttQI6l1+zim3;&=IT{e;;-;$KiY#Z>qm4wdKuv?<>i
zu{ypII+92NDE4bJ9ikD*D%arN*&ol`a$mtVR##6jHLE{#K2m6=hG`PCfLR*B^mviW
zmELmWj=DeK6cT2U3kd&3kHfsA7>hDu$Ie9dI%Iu%b1*>^mM+G83exM&YzG`+#MxMN
z;Us^_&@5-usZ;BZ3={_&C#ZaC#`Pd(yBSl$QMu@znK9O|Zc>}Gkzi)%>9IgtO*(!%
zU|)YLE|ZIIaH=HVrcvVp2)Myx8d}dPL+|pGk0^7T&3upa%zF9x?fqd<hmUn(Tra!4
z&RD1o+%A8xx9eV8dHCrH`dQp7S9jD#F5m<0k5!q}%UPh)>7srW-N=L&Kn00H8dGRt
zSYe4{XcEdmFYbz+(9W><(h`zBuDj=SLZwjLfrtBReItSt_c-=R>~vP*+UEg|Hz5fU
zLIBNb>qJQNaIiGC@)Y1wTXKddE}!EAe_)@VK+pBJGj`~GNZPqB!ZAvf0SCBZF~8>R
zMPsJ80C(1z5=nC;b3BwD2zHF-oMADyxxBpFKkB&ZHpwshOnsarCWy3#r+dXjbC+Vz
zuqIh;f2ZS}g6RI=v+P^FruS(4mWuxR=7C`h!r)>wu;>}|Q?8;IO%RRp!bU%ESYlF(
z*58a+$CVpdu0hBrB<<JZqcWIr5mO_A%L4uD7PsfaEYJB-JX^^txmnt>^wv5~Jyn3!
zgCfOjpolfY9Wtb$J?zGyvP;3|QYQpm|Gp#)Xv_-PNl;zS#}#!Rum|8bl1z`H7mHNX
zM<z6)v_^=iF%yG37_UR%)C~=nwgetb!$d+ism}D&r(-38s4?p9r4S3#j9a&J8lZNU
zmhbo+s({cyk{coA<L9%T?q879N91=l!3gOS;lO%vy<+|TLT&i5Z*g!4Jv3JU4)toR
z?^}&2q?Tt-zDWBd7xuf_*Skc$An6uUr`>Fe`$#%!JvY*V<O7oB!(C!1y6M4+gZ5G$
znSKTHHEN|uE=H(9ijDy3`8RLeO0Pv5zF6xz^V?dv{S|s-Hj1^_-)P}jZ>x12K}^Zc
zbki0wa|#xP^1B=iJ}34kM6X3+?>PpXZ_8!>a15y%Cp<L@)KV!~jHYgT&Ygx2%N=+{
z%WwUPwI$-E>4(QJpw@-6he`MSJx6M+3EBiNqvckR0w=nDgO%5Ce&X`bjgfio#*P_m
z6q){bm>CrA6QA%$-YYI?T<?K{)a2vx5O(e#ZnZ2CI&s3zRDMIb5Yv`$kjWS~y9-&B
zNqymJDzKI!8W7b!d$qH<-W}7>`nG&nzGUx_v+ov97&B$C3=;w9`IrI?$VHvw4@p_&
z-*fWLnm>btJ52}l8rjnr@im>Oq%?Rnyi}}zHeb6rkYaZ(Zj7^@_4*X(o?jwGA8pO{
zy-qzIEJ|`oE%!ADNt#bzx;@V7mZIC1JInIK$O=**iSM4zc9pt1vQ@sbS*@Ez7@D%<
z^MPYIHXU#>G84Yv)0&in7fQskYm1%dfsim?YE_-*-q=?VNvTZ|bS>iK2;{}TqDbX+
zN`i~E@ou|i#NvDEl${Ae)8VIG<GP9=0m_=o86WW(dT}7Bx*S5u%$>}u)TPfEulnnx
zj>po!&n9crjNx347xoPguu8#YJcSxjriPZur5PEmhtBr7>B~Yw{_{^tC-|pt8XGOX
z)ybVzk<>k_&3NmuIcps|*DRJnHc1wHs!b@{b7eMul}`+(F}-}Y8K)4^mrFf?I0jND
ztL0J$@<3$J;F?>fkyA8oP!6g}1n%0AYb@b7eiXL5NLWbqQ-llP)Ne<pan+=is7nwF
z%UpQ$jx<OB^rr|Mi9m*9g|z=r=P{p|cbY}_&?8mu6|{mvgun|$rcdI~@+_r-1q6!0
zI&t%w(==#n{29FaIMlHWIhF%NrYvEhD&-)XLK1}vgxeR%X9z#kK7q9SvI&3gMh=IJ
zLU;Qenz@vU%$J>Fzng-zZqI|!{~z=<W>l&DX^t9Z!@GDC!FjTa->{?9_BC_@nE<Iz
z-rb#tx<D3h0*&Hsq#$}YBxjSQ9S8jsMoU(A3v^b_{p3j|ShAx%)PpJ^yYq!I2a+e<
zb1Xl13-+jdmbXOJjqG<IQ4tZvfBC+S#Ll?~58D`{2IC?S0&1Mll{b`2VEv2R-$hNG
z${k9Rm8hzg0!f02*~h$%d4rdk3%?>348%@iU$N|Bk(3X+W;;xq(m}(}CazbP%rgvY
zmAa;TN*w8!vL(Jnn%2!)N(F{3LC#RoPTL(KTSbecNGKZ%8%M+0vs#JqibHQ`lU?hf
zMcQTg#0%-C^R?U&B{gD?isgP!Q<>(*zE8@Nm-ym2qME3ccUJ3l7S-qj(Ms&s3UGR`
zRUUU;eMlyqthw6!L$ePuT*8wbIK+7;B;WT<#CgojK|`e7c#cz_P9AmD(ka&VX3k9b
zDgAF;?x3LeOqN$n!fc_7hj_etbQ@Y&WnUVF71Ur=J=*p<uVlk9H2qVYIZEC#Nt%u;
zEZH>GH0^7mgY$~Dkz}CKz4p}+4~047A<c?*Y}>ZNEKw}QO;EDjWQ)AGd>J|Cl}@y8
zqfW{YPp{k`mW|E8zQUo)jgV0tNnx$YKs-g%3`>@EZWpG@9DGAP_zauPhrXv;>DQGp
zt4KIilID-*RBSxtWV95v0Log0Dq+2jBv;f+#ETb3&y+sy*A?WE<y3mLG1f|sTN+i!
zOt?hgYS*4@SuuMi>ZYkbn6T*e>nlU;C(e(Vt4`B>y+XN#+{7b>el&T{FEi4K#0d)D
zhow^EsWxF3Dj|a*-HS6YC)uGFPC9jXD=EoyGwo+oMKp{R$Mp3&+}hQLILf+{OiwTv
zW)7vfS$RElJWuR+(I#B{OD}b1xG8?Rfd6yyOu;on1`RgXr_i-P?&*T;v=#bM>yjVo
zK5M(QbDL|LjjL|dDn^h<cVy(&NKf!xkGYKTY6!b6z#l}UI~R@)nsyX+^>R~=@CkUB
z<0>oxowM}RFQ<HKKaCn0>rBr^w`pFtbvOdb!@Y<Kt3H<w6pcJY=dkSMbX(U^oiy9H
zT-p*vrtc+T5jv@}GS1EJS&d@tiq0AX<x-jW{&Q{>?fpD%*ILhL#S`6o)tjHLC^gr3
z2s1dJ)J$m<f%B@PrT}e;I<Z5@8Kfjp@d-T_j|Llqkl8E5{}+u+DlU@_J%{h74iuev
zz$=&5Xj7=&?esCYUM7y4i;EXkX$X1U6pyh?;i8Ns$UWG(T~l!{P$DQT8>WvJ21Eqt
zTf1(688Bw3G@Xm6Un`SQ>7*-RAyyK9_*gwqqH<Kff(RZ|SSTje05VFI<A<J-C{#oA
z)Ae#|?ewYbwX&hHSPJ)Jeyf7W1Y>*7GAaF*k+WzjmCA6blb?Wl+#5ZS#RcEXQ_>we
ze@2!!!?Xt#=~0WAcrU)zNQzXL>c41zi~(U&!2dDiG_$fkQeRHOq#?N(`X^&y9d>20
z1;(B)as^%|w+N#W(<ZH(kk(LWY%4ntsa4eQyWb2>k(N$w63^~sJq?*#TUr`v<3~2s
z94M-iA$g-z;q)om7qUYL1XKxQs;jH(aF*a&sOidE*v;=4z*T50lo2<^?G|RItNYE(
zqN|YN#Mxw7TkU?Oj~25T;t7sPAkIrsebcrI6LWwW>|X9sb*H$v90Q^P;sTxpB-d$H
z%!O!ID=(=soaEX$m5NQ6)$?JYCFX(vDJ6lMC~H`);gaI^^HWZFMm}G9&gcxgzwZ^k
z=Mg8J%B`508zRC`*AlzS2MrUYb|ZSlIGb?Nvu`vXzwir3cO}fKxjJX@S=|v%j!5jr
z*45R;j_1vcbaDn7wTCP0NguK@rrj-1FhOOFM2>cH3<&YtndmOdEUUXZoeHxtiRsSl
zI_`x`AQ$=JA|-L0R=p~*b%kp9&RDi1i@GcAUG6I<*}_t_VbY}L=U)$~&|g+3jC!gQ
zZkAP~d0DD?YE-usV8h!_ugw>^pCg)ty<!J7605_*hB~B<)Eh4|#Sa&BWr54&X`dGj
zk9(V{bQCnlapTtFi4TZtHRt8Xr4VT+XcpxVIQ5N$4|W-89qf@4&xjdItMmjAcEt^9
zG8|Ndqx)j=#XeZ4HcHdUho#RdCC-iM)JP@1&ghdn^nuxHxgh}tzT3O3m6YbM)`i@L
zg6(~_^U38IrkS|4WZ%NpLg{g*$+pEOiY1?ipIzLHo5d+eu|e`SrBim_)L7Tc-w*DQ
z6|x^h*bGB5s>&O`T6qB@n8sIP*TK$gKVH~qV<s%YB*7s8_K!Pk2~Xzu&W6TQ(exx>
zI3%og2H%U@RP=4^v|yAM)r++G9z*a{4D5YDi<I1u#LwT|x7Jzo?>PsYL+WgtN}MJq
zpj|4i!4}GNqT5GW?qmJz+B3rXlal`6J-+X|yukvv!;~-}sQ4*7T)xkCK~{YI1E6oB
z#3`0rje%6_eRkkb_F(57=Mau8hZBvLi2ELdRsxpyGVI!E)53`Cr5qoeXGUqszL99$
zaLQ@24iMk=DbUmj7^gUxmd34}M&SN1^oVa@3+JVE+Z;418f?&Fd|X+4Z}-uwuia;k
zdt<^WkZ@!=_Wm)42X-&sA1ud*Y~*6#IrxmARd|u_>+@>6nY;1XZjT!`66w|--M-yO
zp40Of`yWb45aw`2+u)OIl(5dkR-bmHWl;grfP|>_Edn;st4bFEx0^;wKEGM~Tzem(
zc^Cno>z@WWu9!sA(~ys?akln}z~xPD%2@4Nzu(XdZ+ii&x}xP;*O#Ym0~KqZ!sWV9
z4gs$fkNzk)-;9Zz+Obg}TziWf=)gBr+evTx5=P%<u>BT!A|IL#8o}g8dB>#5yMAM2
z0j)VW)7fq?6TXXQ_a#O%A4pQv-h9{)BUp^|T)OE-g*-+Uc1+0MACmBha_+5xBK9T&
zb9Vn(S%CSlFkvphZ}_MnCCadnH@}Ay{2+s-W+>%Aplx=SskZBM7DrMgQk(|9a}FZW
zF9P9~c2{8sV%^h%+Pz%2gYJ!%ONTxkP!_#{@M8=;iR~`nSL=qJTsCwfE(N@e=iVZ5
zfesvUv*2{)0;i3^e6E=j-k4pKPH|mOO%lsh?*bEJ+~I;y*3FTh%#6|B{~!<(#4!BL
zfq_@tq+kOh{T!i(Q3p^sW57Ibxt6-aNK!iCW!0PG%tP2l#AO08p(r>>UqHGZAf}&r
z2n#!bp~t15w*aM59WYT|u=@c2Ws7(vA(vC9B(dx?-kR)Z2(aglMCrXqZyuzc?2@*B
z*RG9R0AkoglD8lQ+2!0%4K9fHY}X;j&<UPo<Q@=)J&p0(v>T<AmYZ@7&~~DKmQ~j<
z>B|@SH3Ko>K?s?hB%>6(*!LrsTQ}|h7>Z9l9_X1&mC4&X0H>J{w=wlM?feLL=5T;F
zw6`+nY*3C?FwcYf==w5%a7Oc(IS}3OgK{$di)21j`<ols6eXh}9ZF!VmHh|BpoJ#k
z>c-_UVLN^<Ub)un9--NB*yVR`8B75x%}@rBr&7wE+5E*&O5exK)eRZ!bV6f5a}Pu5
z{3l@N{fPv3|LD@OaXN$Hu-nAVzs15ZtIy?z4ot}HsHl3gfR8+AYOAI-jW1<a@;CE;
zv12MPd29auW0-ZgC4b2&{cxV+itC&&9}4s!wWxNbTRzwN1Au``MuFRrT&sML{?piR
z95N`&o8315ef?4WLf`6*XmeA<)*m<g1By&$hats5@7%S1|DZ`%7+)5p@QKl6!PEU~
zEdwZ5!5S<dW|dRZ4RFzi%|Erjh?9G$yNTYTMzHr3lEc>oLpR&?yxKX!8nzT(L2!6G
zc5UZ+&F(=FS>d1M7+KRFNn;jRiC>M%tkrnvlq=bvew9|tRP**+dvpM%A1FER_flfx
zz7o7CaA(5oZ)&={<upw0{Nv2oSayHxOQ3-#-b#U6NJ|h>jG9sp=cD%Dc#xcccFB=j
z{N!ptX}Rt93Z@ENDUq_V54*Xzk92HWDyAql3d<W~4j%b$kQn<ZSOVq7Z{hXUzZCl!
za#W3#gN;C-t+P5ux-;Jf{dQxK`A^t4qd(N6t*&>%A_v`h4cFGLQp!@Pc)aO>(wV%!
z_`p7vXq7*2edEilA$k(7!;qzEj27GETWFq0crqNkWu!n9SeKmXJ4G&$`B0OuGgvnZ
zC6I_*Sz~Fhe?Le|QZ`+hZ&o=2Hw0cBgfDba8<Y6)l)<ZJ(5}gnJGEq=;{W@#`LOR)
z5!`qu$i}>OaiCVG7X*RNM5wi3jY5O`iw+B>xrdW-1$hOW=)C_<2lvi|=aO@B9p#ju
zgGv+p_rAJG_t=<hYYPVEFh$Frr#lnxhHj3@&84;FV1mm(ei`|TKU6Es-+0`$>Doil
z_SYL&zq?JW$)C6S$QVZF7m0g|_T)pGBWL52AcWcI&9w0&xcDY_C}}%TRVd!9gVNwP
zFvln@xBMS&wgg(GMZ_?a>}P*W_WzI6?IQz!dMN#^r1_s)*!Lii?9`^#WaH=mCi6bh
zg&Q>ycWXm<82_SqtiAnz4ycU*vG&N=55bg+^+g0`8$X8uXirgqEZ%?p2>$V(k2oK}
zGwTaMJOs~;+Q?h%4`*Qg)&6gfnDGR_<4&L0=4-t<27bSSck13+o0DbZt^9oz>Qoqi
z?_GP+)}usCkcR#pSp2Uy>l3_>{l`E_@VO6sfOh`JefdxwN?6?ZFl2nKEmePwzw5A`
z_{KchY!UF!4hEYG+kdtxaD_*Edivc2V+h%AG4tP_`(IuOEM@;<%p&*oz?aPVpYH=M
z_wSLgZ;df~Mxv5$FS!>*(q?9N<CFj6_tzH&gcR??2<r^0<;0q>Y~w*(l=hDP*IR<Q
z@UOY!if{x2Ur{3*GRpnGm1!FnSo`D6Y4!J|kgHtz0rN~-wE9u>KSz)1fdujY@=oC5
z@3ayIsvLngRD|?BgxqJ;22<MS0g=2f=9X8={|Ey1$r#-Nh{B@2%#^hLhXK{gy%|Cu
zV)b9)H*nPdLPIlpGBlU%SNz<=@IzY;@W*ZEP>rKo{VK+up#L?H$SGUVdHazcG9IK@
zgBL!UDN~=C{_s4rWrObsn7pyyv^LC+?IV2;QqK1=a9Dot0aK*h08i`2S+3@icj-TF
zAH%V8Wa|x$?xyA5H2SD7S%(V$O`5HgPY7C&M$?LF*Q8;%dnj@({@FVec^?~tI`j_N
z!cF-vJb0NuQ(=Qs{!RrUPM?`e+yuO0cA+f)FT8emlhO=-;B<dhlm8s0e~mi8UmkpL
z@jJql_TRDP-|X(neowNUC;tI_e)oFsE;?bpLH#mK?PH$&eV$=o>whHg9W35E;K%(J
zR2n$n7>)%W_8x|ZAVp~J?YPnefd?bG%YTN6e*@b#$L-p~!ek-(<@V1>n{nsbb9_q1
z@?X~}m?9j2<<hQ`jt3hk#%f3NMlkYc2LG>5>P<6z=1%`$-;GJTKF-#wlz%-}WL5an
z*Myuh0|E7B1Y1<BjU9+0WByNzBLsOD8Sf92oUl6HzHagPGqin<QP%wP;EBxPKTqs$
z_AT-+f3OjMZuMUQ&H5cMz;d&QdE`Jf_CJJ)|GAp}8NQ4vw_r&|Uu&e_xIG`WLkVEN
z-B^p)|7&Bo|M~evBX~eH3@11bdy-rKC;s-&UH$o!jY$Jou<r)xbT_*GI=XmMM*iy^
z{X2GpTWtlGdPMkre;9lxDcUyUuFa?O&s}x>dTYCb&w0e)E@L<UPsHG_Q<eYO%k!Tr
z>_r{8&+QDyGLR=r9YMC<>E^GY^S^Bbm{BA`Y!&J#K=PoQUlQ(1(;M+C|8<Ffk3LZt
zwzJ+>*0<*O?3lQX*Ry%0|9R7Uv;Tp^cY|}Mk~UADo1gL^pUnC1z;^9Qe|`589Pn0{
z_wEc^-|2k5&?J2OKMVxMeTd%d%XRA6Ph0E$y_EictXKQe5D_p7mDZm@Lj9VIxPFG&
zyx0HXDva9jp$;_65)_MvF7N%ne9g_Nw(ku5=?NE1+MbOF#anUZ%7=|y0L;j>gVleZ
z0nl6m22@XVU>7V(&nW-uRJyV1{c{+t-{9}<UY3G|L39Cd5nZ5HwKDejaTn37%JO~0
z?f8X&DM4b~1+A-X!Fq^fm#U{h{8)ex)6CaW>jUzap^qBURJmKQ-@_E4U*?K3Z@jiS
z3foPyDhx7c=Y?d!peJP?B3K(HB07ag?_@LuPMM9nw|EKa^%LM0aGCmS6<4GwgEZdN
zh$0jD^CoMI63l#u=X`V+CZHH>36AJ!I;Uny=-vGK6V%acp}}e~q+*(&*S9%1TW=P{
zIn;5?H(%KM2?ScoDxDcCM36P>gQFI4Q|CkDO#B(^j;ttt`<~7pnTD^6CPCli0-aBD
z<r-FDYTHtvAIFJ%;>v)=CxHERi!I??yq!w(3^>Lnpr&^{hY2@77Tp8zrEZW3+Cts*
zdQJ;Yi>skhlQ0D(kN#ItQB8?z(dL;I{gCuWa#kYCFTj-50o_k6)cO%d=Ngp@dQ}g9
zxSk`&fc^OlI!_xaMR5|El1NDBvw6_Gbb>I`7R=T{+aeF*dP{ImB+9(8mEWOQ&<S>w
z36Lw~1NiP+k96zwlkC4n;2rMofXG1jdD-CBcLDAFC#0l;NRvS!*?_w_J#`Z4sMt7<
zbgBkJvKg=v8q!aY$oruWw&DW&oUY=6fcyN!=!Ei8*-RZ*HmGzOhhIrEc($d-uk@{e
zzs;eGJ8CBv(v}d~k1l1wbsNQ|pe%+|`)$FndD94L@b1Inp;OM|E0v(eGX-bUX<HVi
zSAxB7ExQWR9pjaZaMx}Eb|R|7%4pna?=;{IoUyx(R<YhOVnOX1i~fO-<=WEKPj%b)
z<QU`nzYdoNwT^VPr7APvon`vl)Aq=RpWIRANv(5>eD~P11wJ~9iV)IUc)g}bmC2Do
zX5n@@kUnBLbF0p-HMoU_tXWVoOL0%K>n?JhdNL-0G((Q*V`$%N(eyM9UtVKynA|XT
zE-sK1k)umeO5t(8y566UlInrNm(FcXR?LYYjp~A8wjJqmYRu`}iWMBMLv&FI-46c@
z0jLEd&FLo65(Cbn?h!%9wn!7LPg2uKmAJ?2c^Kc20!`I_nA>qUZSOkF<ug<t(T$XT
zV0M){kG0i;{e7B}n(Kig!-Ia%=r?y|decoxOV*PZbN0^DR<+t~!%2<1W%1!$#@$MR
zV|Grk7V&@<pUk?z806ho+}7`GIs&iWg(48qGa>*_R-#z1!7W51%q@>-Br!v&tQ{pT
zNL6bkMI{Q$)~;-P%<#s08pOJ!4{Eg7qXgW;<cb?tKSu6nZ?0&b6j-T`@y1Sifj?5j
z_~$XuwkVOMl@Hi}wZZWep{CNcTL=Y95E*&!df1O)^??TBdZ2AqkUWP*H}8Fb%vvBp
zxKM1lw};MJ59x#*$1u&gEXx*W1|!EA8=Bg5DGKxrh6^-}!s920w|O8s_yZ>snyhif
zLYB>p_?5Z0q41b`7SDtgA*eT{<++8TDP{H`jD(r&>4?RV?Uok{kY;|1`{8C8)=bL>
zCI+1Y^%tHw1<EaKLmJtMz{HAUJhR+fpb_|Ts(#ebA|){+L3=rYOC4v@Uk;V=VYft^
zdpQi~4{e1Q$q3xd#3zft3T}W%CuKZh=4Y8;l1+&C>bP)Qviu=+I=)Vy6zId!Typis
z-pWAiT<fmGJe9Z{x4m=SheDN>KZDjG5780t;34ewUg*7PJnTk*1n_V*v{fI4g-cW&
zlj>V5?N4itP-l?>|AGuLZVY?|mTq1e5^YO2=9m-J5;L;zPf3hd6VSIbykD04sjjy2
z%lFrof@wihUvj<w9w6;aj*6yMzZla#N4ot3JQ0nf5z3^2EMxjHf(GtJqOtGApOI?(
z$blxv+kX3aL~wc(_2tC9?;yQ@6MH4aI3^|K$Lp*ZdD8o6im*8wwu(hH{HR-2%n0yC
zcbHS>waw_xAIAh|q2BDesLA4VK;G*s((apY$RQv@H08_wW`5185K>_U_Q&}AP@w&F
z@t*^?-{aZ;YB&OkLeg__eKM5jDE78*@~`k`+~NFazy3M<QEHilC2IR_lNUJ;jy_W@
zoBPU-dcsb2vO9(Sx`G6s=04x2T|st2gMRlb!4Wb-5(BabTDWyVnwEtZ{~!{S{05ec
zI0hc@>{%2j?523ObP;>|=lM)Z)DUyRlAsVxP^d0J1hsp-x|n3IK|XBoI@sGpJd-We
z5<^Mng%ZW_FT^@FzCawFb_doKg6yt7I$N8pMs%0Qq$}RY-rT}8(xR<PyEp{45++L~
zcKT81R2ob{`tAX5)*>rL>Wdrv2XBg_e8SOIq?k11z$98|(0jNwQD2Sl@-fs6s5|3x
zNuw_$NhTOU2jYTU`TT{F7T_|tqLi;@_*0C}J-DYZ+KJHcLw(8Y)}r?ZMVOp;(@+T+
zvhz7Ujyn|nFGoZU7mlG;e$6Ud4Bqk|ORg-=v*ttGF(WEh!lZ@Xvn5Ecl1l7Hr0Yg0
z)TswEE4w{*jO;+6zu=e2;`T0f?H#(u7SvQFJeM{L#*|N?9O`c}c~~xKVU$%``Y%Lr
zrukT$P3D-NZ2Mi*^0B2fIV8FOqqP&H^j2;kEd>~{GnQr%V3O^cyHprImxQFWo^vJ`
z3tQh2k|5%@J7HT{Qd?@ANy+LnxV&%9U(aUcvv2b`ao~Gajufp;%k#USk5+za)d~Jl
z0LbPc#h-H88QAcC%1NL2D=y8D!&EU}nOK52V?pyVz1;}^3BO#5V5%YBk>gf^A4&Uu
zg0Mz0Ft@EnL_Bz#(wF(RdzH{b3ls2Q`^dwK*o=HwXY@gGC>D|t?`FU2##_WT4Qfjq
z;`KfrE~omQJ%QmeKp-C?GC>+sV!9(4x&NDSwvs7^r5*XR>Ic^glMg~LSx_4}+lsV@
z7It#!+@@H?3a<15Pz~NObb1&j?bu5>m&w!Xfww#(@$36-D=dHk>}7R}^)P~Z#BCKH
zsrm9f7H8V^Z~dSOio)I<DHL;EMtndJf?;Pe9?#OY<iHMe1aBTwd2K+ZXsDVaK7fB%
zIyS(PR5U^$qV?KuD8~n9zvs*Yd3RJ!J$B3s!IOGP|CE(q?_)f}g&Bn)!22~uC(%g)
z9`QE|=2sA&;mc0wX0?mGKAHc7#f+Tz45^y^+NyH(<q}l&*kf!5ANqLH-%STlomCxz
zJCLYF2TMtaibG>q^o*x&=k6~{Px3A`0Q>vHcBjuTr7|vhN=yxl9P?D>F{=rZX^*RW
zr|K<$9HHN{?TZ;GK~SxWEQ-(eN>&HZEfSTUwV2{0mMKP%j-r75?u7XA8UhoCexseB
zcl`X30Di$w2$dAS`&r~!O6zcYvI4vA4v3rtyRY1*i`9;r1dn_XKopSFP&TkXkk}5z
zS*$@duDS)c_%*c*0iVX34Oz`S=m1^SOAjFiUZ_nqO22r)5i-jpaQ|%PJxl7l(8<^$
zEJ8sXm2plN%u^oi7{PZxonLTsQvFE)XgtA0E|eHSeeQ8~vzXuGhaRj0)iL7FW@0B0
zf%A>8ABLQ{afC<$oysjU6lr04I@6x?Z2H-K`Y|rMXH=)a^@*(PT=y7sW|n1E+}{fg
z$G+Wra<<99++Ag$2h@~bY*rygl<4Or1)tcD8~`vJJKmkRrR4Q<e2aJav6FLEFokIM
zVBbHYx!cFT<QL`bdkjHTlQ84$e0GTGk3*-8OHp9lkG~0Fydv$}orcS$F3*T$VP6QF
z=}&{7j#TG{AOy(c%BgbYlCV&$Culk@E+d1caeB|AQm4ki6B6FXH7bDG(6eu6)tQV-
zBPkw;xJwhvx!Cg7CmRI*wIl6==a|SU&>Jlnbg_;-c}O~QrRyTO0O`o+Y0M4ghVz%9
zRd7eY#dY<A?;|YIAy`}#<xOTvSZaijd!~H=@u2XjmFB)fDI_foyFIoczZun@^ApR=
zRfLI}^Zb@~<t@d~@f$aG6u>*vzT1B2&95^)xpBep70{<E_rN+Ei8<*zj5ue&inY6A
zW}o`_799P~eI`$+HGL^W!})(oY7f|cHy8}61VGv6CESt08(V1AjC~w&FOrrA9cPa6
z1vd#_EU|42WM_zb)21jW`Ab|YiHF~W`^rwlyQ}?#)_e}w;B#R`HB_pGn`@BI;q7;h
zhzde1R1nCH7+1O&u529|e332r{*B@)$gE?Y4(_*450eMhKy*7ixtXmU?~E}0%RlYR
zLZ>yyL<+#4v1lh3mQ??JSthKFBS54KbZCAi%x}(h9eS<^;DkJgzkhbMLmV-{MfdRc
zp&ZPW&-XsX=Pu3N_tIqGY4>Rv&LFYpc~L&nO~`XzSjs%%Z=3XF_*aLLU??lJt`KzX
z7zKvwRq)p<b}ZIDz@%><@k>;*vf(It2^L$e*DiL(ie6A-_C$Q%8z%)!If`9E^D=mO
z(=#3Pkk-SZvlZX~c-o!+DZw58X^=_z90jfU>wOXf@O~4sUr4ohg;DKsdl>db@Kq96
z=~9-`RKI~NL;AUa*q9b0`En=H|A4!>uTxQT`_JVU*-~GYLD!%8{LuuGDq%lP9<8T9
zacTA8G%S5Phy>qH^refl!`+|^j<R!ED?XrTZb2OCIhhOe;35#tlH@Wk-o2%8Z{88S
zKcja+|9N(MDF_8ps;SInK72cIuz9zeL^~p>US?+6{f`!9gFvjRam;u`*xp0i&OTA$
zcf?icizY9!_soNC>zn5=`oYXn3D<Pkm2D^*6^7ZR3k3&H=}{Nhf0~$pudOU{)&h80
zspn9@`VKaHU#m$?fz<S~!D&-IMuHOmpY2LcG9m5u@SF?|UM~09>QRV5=d=9i&C|$U
z!)dq2De(jEmAb9HqOI4vprb&Ikc<k`S7Z}PnE|z|Dc<xbvA&d%D4TBd;SwFwoor8)
zja<N_1(@`X;J-y<1T$Bb`d7O^B{~TwuKL1Pj~8hkCwzUrJnCE<%#!jo?2PI59R=4U
z-k&tYwK-+oUO~?TbjslvF~fAk!r0zQLwPw^*3($Y%{Df?xAmK&VQ0l<Emz?_YdmCY
z6)PHdd=+gaN_uiUD9JhHunZ6_h3al)v4VIoN-dp`fUfhN$?7M>sIGr`_b|F)-054C
zg&rLB9Qy8e-<bOvVO5QLF%PyHp=u`euEEKSz?_J>Oe{?j19QXykYW;Jq&-vjdA4PW
zyD_LstRl)*C7jf#@YgW$J>%}gF>^qUm!pIZsdt~sY;(S=*Wo@3r`6HYL!-+ha5i_$
ziMldjJT$h<GU6WFsJdLn1KwfvBUjp343D@4HoGQ{J2m4{xCCEJ!of@KV~S_HJF4QO
zlKeZVKQoz=YWos4ddhXg(b?J&2~KA!pS0P1Iz7&-T;zISaZXoz!Il^$8Bga>#*pxf
zbS$muBiP{L6>>?X`L}+U3_nenI$|LQ{Lckb=iCV-vCtbDVV=uan|E{VguV5sZIJql
zF*ee#^rrXGZ+_WMq?$ZzN){F;*`_VMlqB7Y_he30{j>@%QNfevvAbQZd1_;3MZK0C
z-jjb)gkQ0!o+SA^z7(ydn)fKaKr@atVpggH$DKN!Ih8)*mTa0uylbzmv#>>TA+4Y`
z_X-@ZCcqn-r>-p4j*!JFK5-_&rlxmy==|wHyn*@nvNGWgBY|QF52uB=v9fbD%QLnU
z1rqIN7zn4@hZ$CX-e25Jnis*P)S3Uz+&~HVSI27@#}F5qqc8vH?rd{{;Qs~adFwv}
z(Am&7B2c?x$XxLAM@^fD&BB7IAt!Nt0E0mBvS4fMfRsok;KSDPFH^6lwd68sNN-1x
zL<QnBMshV7sq`_gAaNr!&KH`~Y92jT{rT|`USw}|o;Q4XRk0g(nYbB^dF;}p1*aoK
z)Wyi|0da%bm7bpFZF#Vq=^}|7NV@P$N(a*5%L61<Zh=Fx{^PZyt%a%#B<aV8uf&Pm
zbm>CuA|k+nn*eooG1vx##`TJ9tupGt$7w@K6f|^=DjK~#09_nDd$6l^yY$?U#@}dJ
znagk%HWF?$#5tWoxdhOQbvez}XHUQ}zsnwxSc5kyb~L-V&q4khZVGPEWrlfX3~4bk
zWJSDR+;#dUoegpP_8#FHFkOE=GEfX>%4)f4E6~I`BUAAfBJ@G<A@#~}s_}6`jwuK=
zt^?<6Gu#RLX0KTR5QBu4m6bAXhw~dk!;%!?Z9?}8_ZIS>;Rs?YJZ)c(`y5dd@(Y3|
zjpD(uj?sI;jkOnzyUimEV+Ar4k#Ksprj$0b(S|q;d5UP?8cv$vt9QqaA24KSi!j5&
zxxW+gLSl}iEor!bkuCWKdx4*h|Ix9SRh~NA&-Jq3_&~aC*s!M3nA@`38E3<wAwWS0
z5stf_1c3r%J;d<aT@M*LsV;ISJMG3h1UVeAn)N(JB!3M%GwaK@dqh48(F@0eOMTRB
zS#==M{ta=Y<?X2Qm^RLcGnW)bl7u!iakDds{k|W&bgG4#K=63VAk2DqBJttYv8|cq
zbFUzkr#O7EL37%4Y}j9`mpM2jNA=;j0{C(jE4FqlKCLmH$OV&UAvBq&8r>qCTMelW
zy+Z|tV@r~=l#1L7FEUr2s^!I#vPph@+vfVfTJ1SJeW;9K5{&<i{OYp=14n<ykJKLz
znt6?l<N7iT%7(|pa;h>agplqZ1nxI;sGz=URD|nsi6nQ)KrgK$!pg+HWJmtT70Ppc
zNsi!+DPr9srlm=6F-+Ce{$$w=R5SA}+x93dt&CM)@;pDe5E5SflcaiLH^n7@tp3iN
zvs?Rkc*st=D6=ql?HzxqBJh)R#cyhQ^yL%r@iFXuao&-ax2Lv^7&RTSh`oX{&Tg4;
znr>*q(%%{&#}CK8v$z_T-E`h4aTlCt>gR-%mhi(gD6&y(ggD!%<2`!q3^!bFf2^2$
zk-CJZefw}MGTs*s5N}JL%3L9d=3=%=IPZ;?3bfrF%_dYf#WaGApK2M0?3{!s$iW*u
zmo5sUQb%5znh4=DNpB$wNQd|E5>V1UwA`#9PMV1QRiaU?RjpSo9j{3=Ed!!mbXR<i
zs6@9@g{mh<ODY0B!F$HgW==)bJ%{eHDNS`HTkFRRnh$u-9Wk10UVJW|TQpi8O)8x_
zp58xhRNhKTuh6HhSlF%vhE#o<?82T$+s%6s=d|#lb}9cmk@C-QsS|7!<?<~*geME|
z3Z7B~O5O4D*eU(7(q^Juwa&P6`S?R-$tl0#*`@u(0tcTf+Ar;P?**Et1!OggW|xD?
zz3RRhm6S`MUmMvE(bO3%d?I4g7Z}P`N23#Ld>W7DPWL)Z5^kpA#MAeVM<-945RI~y
z=EfEflmDogif4PaCzf#6qj#pT+cZGvgEmezQ7Vd})RTpEJ%&1Sh<$9J1oE6B<8Kqm
zw)s=1j<_%HPmb9#*<CCMl&HN)wmUl*3~F7S6Td+&Gl$Q*=gy7;v8rZByMe($CGWc{
zuhc6euy?hte9b^*t0%<R=Vj^5{W2C5Zxc`?y)k5nETbB~P$s^8Y)k6OD~$wQT;<VR
z2dZ)Il0hoG|NXWE_DN~M<Iv9O3+4EzpXu^>jweminJdj2K>^~s#<#3MCP}AO+uX{F
z<ZI}RAO#h?=EnApB#`?;mPh>O`0WJWw@zD_3+=j$O@a$4uygl|rD|FmxIHTsmod+e
z9&ju0COu1GWH_l=KEtf+p#42XoIMsu3n@ixV@cY}87hKVPFpTGw+Sl>ci^5R?0uad
z9Mi$3jHLRo4WXr{&v*eoP9)JA@x5yV_}aEEvd2>hIpw4`;`T2<h!O2;R&c7989ylS
z`u{O?-SJfK|G(5}kd_saZn9To9$TU8y$=nt3Fnxdqm)WQR_1YVY}xx5CzUb^Ime#G
ziE~aU9CEDR``r7zzwh_I9`%PFKA+F~{dzs;(2#gUK>LSQ`eU20a+@vs{d`^-sBjxS
zW6;xl|MfH9fkjoLpy8m#o1b*Fcx7)+SM24Sa>LzB-T9`YC4}S7{ZvAMCT8iamDXm#
z<J`x52EQ0J(n_fCIbj40M*@ec<kY=>A3CDYcbSsf%KT#7v#dO;4#fWDvK<Qt4GU=~
z)}lb&c#3tH@T2|+m&BF1;=Qtq77#x{8-s1@TQwCLrH}hed~9Yb`#K5MQxC&HqLJ#M
z4i3%YHFGTmMyJHp@r>0LBj|E;d%4Zhgd}<bT3~`cyP)@Nd(m0m@>iqp5wdP2Qv||z
zr@mkn4~-ydtWQ^X)mPYsRTgK}G>t4itwq`K)Nzm_F^^xTWH{{_j$EF)K19=3)F1Lg
z7@8A=q^T?E2M--di%ai=z%-H?!@p3~9JdXODmT#%zRjJ@xn;c1dMbhUHyF&r{Dke;
z5qU=+puX(0g0;{Zqc*Uzk%m3kHQh%8P&eSfOO9<I3)SuET59ID=X4kJQ&fLj*z6(~
zh57zpkIk6c^EE3v<a`lBZe${@1T$vQ=vMoNpL)2vC|n1}m2)N^;cSbEom{2XHbw5T
zA1KHXn3C>eKg<XwCA4uMf83C=z(g6V{=U9&w1e(n)gYRqg(+VzzKM(@?A57<4lYO_
zODe)Z)5N!;Rm7?5o1&H)y8{N)#5GSl+K8L%5Nz{tQscfnW*5)%SATgQN}m|7gOGOY
zuwt$nb;rhLc{_MlN0p!zC+|NNeY*(EqXCf?S+U%EUl;U4IVrW(HVF9V^~WTDhpRk_
zEy19Z*1+4RQE*Wka*f>XD}^zjJ-$N|Y*a7bKFrKC<mNhc$aLmLMa)bn`Vhqgw^`o*
zi{j=Ria2$1*J*P##d;wt`_AlZg(>J%rhcPZwb6q19d-W7QatKG*_zR#Dn{Pl)~VRX
z%O)eok^HkMCRkIlQ`535C4~G84;>*ox30Rk#g(*Q`-=3RqTXo7lrQ-vEt5!d+X+M@
zM~zX*cHd@Aqyj>cfROBJmos;NwXQ|r9Cy?(-i_E;W^#H_x<B{FkK+(q%W;m*kJ_eY
z-ak22st!T+gKovdLzsf@H0J#o&AD^y;&W!Pyj#?}Tt_IFl|{#eW%QU@txB!T=3K9<
za9>tP|IcakRlG8`UvJO96n`6c+j$p>3n~m+#HAk*9I#M_FImo**k|t#`d3n0=)A>V
zZm5X6DBPX3l)dRyDt6Cw<d#G1!v()j|1sl#vCFA=ya3NL?dc<+&p|sdx1Sb%!2o2>
zP@@Lks3jUoeP(lC1$K_LZ^1-7O<Oq5z9LBm?OwvBvlivFMf&5xk(SHSNcr<zf37Gt
z%qA!OWskC5T#rhVBUc>anYj#hR)N`t5I>$|qOL4=ODavl+kV@cs~&Nu@K$P-T!$-9
zf~i&*^!F*LcomSa6`ZMyW${h@2usBom}=?XMzf>F=wB<7An3QAA%PI~Vu224ep*FN
z#|{GLrN<|*B`ml_e(?pwh5dHYbgjPIC%;%`8@d-p;zFD+PSOeHBF}f3@Grp7_bJ8Z
zw=8p)z@V_FU1qdQxo>(2abGOQ2z@3)dR=UNcabOLO$#?m${EL^KIOK^UdJMYY#Xpy
za~W2E*l_zk5HtfJkSgC0QN-?A<Bw~-r~0V2P!Patq?N$j=f72P%6lc&z4)YdYz~Lx
zvI#Y0qbMoF21S2hNk2C<BNKbNWOl`TFOIx@BVvvgY>G+0Ea547YS-w614i!gLItqD
z25uRPS-K&EMgACQ)kO_@p!b|~5OH2@8+fJ>j;=Q|EX+J3vgLtD^7lvWcpCTpDczZ7
zIx=^LyJF%G*-`_dQ~KC$>rd}oYnjD_AhKkuW!~sN0OQ}-gg(bWtaO1dZ#}xN`LQB@
znT8?quo>9^R<S2P;)C#aUf^NY6|FK_6sy<M(&W+6dvy19H2dP&hDql_bs@+ZMi}I@
z(MSJ25_~!@6GIFx-;<rH`>{Y>tq@Kq*0{dalrR0dFnN!ORC0g~V&#zx|Dx9>_pVV_
zGTt78Zyj9txJB}ETBxC@5LkON;<pF48jf&@!29?V`>WRJ5@G|U$h%RIO_386j}xFu
z!A%nrH49DkK>h&m<voh{eYGQ`oV^!!w#xd=JMMK33f7|v)9=-QS7wA7V>af$X0=T_
zoKdw@w+!NpkMRv@NFa%_$PrOZTY)vbg0HJ}74af|`&MLyEt2&r>>!-(6lYNwx1vOz
z%P+5MK2mP#9FPm8*vHZ{XnZ+Z7vCKb6sc<)&)?I9rM@Apl;7fo^XVuS9!=FTis#o*
z_vH9bDT})`lM(ilxgSavvUv2xa$<C5=A|W$R#SHsU8QK!zgWQTD3Q-5YVdlw#epom
z@%u5VlzfCb1r~gY@<d0th<=7MMXG5BxSo&og%Oxl2amZGI5yE_kL_R|wrd9$LK0uA
zmBjJI{xO#jK%b*N^J(UT$qw%gL1@~prH5|5cjnS(b3?~P-7<A(zNYpprM$(>a-|B<
z;ad7ElG8QkZTl>)*rQqWl~5CEF#l%9SWO{ENWNKTp6u7(<<RTp(umN}sV~gTmZz_s
z6;8m3oAzx5p^yl!G7Uaj#Vr$v4(0F{+MiR~ai)8+#Jv%CZYtDS&M`Dst^-jJcQ0Q>
zcrKYuqIMK6xQXozPNS5uA*QMe$>z??Azx;md7#wALSIrMqoF49%uXfhdqFraQ#74E
z@lCq--8LAecw4xe9i=uDDtMbv!p-wVXU@vewQ}p|j$HODl@_;x_ZB`lFRWIqL?F(!
zI36R+NXLH9S+PlN#SRv?_vmre<qck!99fwdE_cu?_qDBnqJYhob<L2WnIN;60zm@f
zPn{6Bl&z}O4m-}Sf2K5fW^At^YMS=sp*FSN7^%%8IgM8~@6|_=cdjYMAVE?<Y|Vg?
z1+>V$?uv`9hn%NXmspVwuKUIUhjH}U*%FQGTK7U@3^G_ZIzDx4D*h_%7w=P$@(@S*
zW;qMf)=D)5BSI%br+zvudT{!$i6=XCprjPPl0z8wk<AY|+FQ+UESw*$GE-0W%}gUO
zo^|i4w0Kt648l;rGb0aU5fgN#rAPS`z5Q54Dv+><$ZFqY_Q@QkCEQ9+@wKS>sc+m5
zSosu*id%Lm3jGm0ww9#zO_P~}W>nZ~>h@Psn0<YF<LbmL&!~#;WYdn3L6+O!MyP{_
zDGoWl0PH$ssG<3NRs6m$fP$oOb|4<xh-)fF=(EA;^Ixw8tIn6lWeru~{l~8Y&$wc@
zfxwc;VtMzAvs_J1IZ`s)x|tqzExIhL_gqTOxHv9v^i0SFSjQK|;@C{Mz95b|&VLlF
zumx$zybXqT-M!I&xLUBim2Vl&A1OX|R2VF+pEStw)>Juc-pMjY+ww|UgZP4A?d%5g
zF(Ry~!<A%AJ;PxpFhFkkEEo~1RCH25wN4+aXSArcPYTn+TtIRc%@q-i4UobMBQnuj
z`YWH5wuPcMNWtm_TU*8RGNqrMm0eXkE<nn~bJJCYe_##EcEkjKl_Wn@8M*gyEBk);
z)&0?vzE~E|3iL?Fl0JI4+WF^K?XdMoU6e@u%c>>D1ZMHfX%wg3EB8(_m$nC87NQA;
z{8w01%<*WaJw2aNkT8L$O<!H(S1u)Ac9z<04qJVGfi=~#nbJr!8tqW83k#he2Qi~R
z0qQ57d1>_MO2!z_Zn*WC*xmoKj)AABD()3^JyoRnQGcvIg}55OJSUU)`TV4!jr09n
zbzUTj<0ofua<R7j0P8Yttl5;OJcjuS0XJ(9uY01;Ccqxsl49<}Ga~qolIvyFaYl2E
zkZ%@Wx0n3~Aw4Xl0V|l9+u)+un-oxOJwrP`v+&(5cx~t20s>g3<aq8`W_Y6fNFyEv
zDb_vC>`-UQ+he)u)&KbC9zi*aCxAgA!j@6!)z?y%JS&PwbGKCG$Wz%{&;IBI+5&u&
z%XN>KKQqNfLGC$P3w}-bW*#@_PAn{D=?r^<S<Xu=9~9aej1I6>ksq{4B#BgV*f@Uu
zksa=9RJdWTXT%fiUGna>MqR4iJ??V-p9OsyQ=;j%Pfv1pe>N#hq5^zs5@WOuL)&ig
zr9->*=bEfLu^X|}ZkNe8jHpFn@u+Pj$TuF<W|3s~;!XXKG96Ek8w^e}m`c>gs(g{h
zd6;kMkg!R+`g~aC41>8An}cxXo*2)&ZIaR?TZ6S_0w|bLH`9*oJ4gG2E}l6aZGBO?
zRvTZwu1M_?i2posDSgabH}Za4X7X&a%f^{z#wkMk$Aj|7o@*dh)`Mt<ACphS(-Y+L
zta)5_fJ_NeYn5ZXzqiX$DC%0Eix`}<+4Y$Tqe@29{FtHL_!J;+*Ar_M_U=X%f7K-P
zaAW0svwxK%?GxF*knz?(qot%~y5%lC7P$VSh#+D{KS7d~4t83g(|tvf<Eie}Wp8Tq
zbG^-BMW0xx9F4Dvsn%g{{3lyq6iCC9T&jHx>@fS>{o0u?loxrfO+NfSd+}7~YJ5xk
z;E$9G@<vEf6jNB)R$B_b^0n=q<AK~hRuL8iy5wtT;U5I9oWl&X_0b@Nq7YIsz<1N^
zJTxI4RQzGB740fozGg_kZFBfd^b7UeDQdAcAQzUW#!7DhP^yG07~*w+T_*PQh+>NQ
z|9~>hYHbZ&v~2v1d?bg$rBP4~!B(w9j#-ngYT0p*br_eZ{d|XV`eh5+fL;}|dNcS$
z$pwZ;dpe`_9vU$$)SGB~@u~(t0!q2wfia=O;-4P-Vb`;1dw*M-AYR!Tut)*L5trbh
zd)i3nAJXD-{6l1O_e_xG!etLz#eu`XeB3Euj)_+tkOi!P9!PZ*P&7T2P0T)a0ZW$}
zo^wx<df#sneB0~Em56qy8QK=35%{i+GnV{uy37A@6`nONQ&)&r!P|Bb7ols-3QHZ<
z-ZkoqIG^Y|d|Wu|7M=A+zwlPYj9Mpjb*+~LRk%6=S<nD&EVB}Xf7uzJ0<eT#{KC?}
zsHvUZ3LK1;1dT6&QvhO<AYbBLwJ?3*+^*Z_Yil;gX5$oEkW$;vA!4r2MmV<ki)z*#
zco%Qj#t*KcFoX4!LJM0RY&Pnu@1zo=c=x@+O_wo%wK!hk9|?4uy5f8Vn`|QmFw|;;
z=Q!U2)M}HWC71XP1GX*ZeMrEmpM7IdFu^X-VonI77)|02nkh~Tn=R;Pia%G14*S%7
zO>zHefz3r{d3@t4HMy7M4YX`uoX<n~Y09idpSAzN;AO>38#@9ECJz>fY%6V>X{_pV
zw~wBBE=sE4Po*=&PXL@9d$kULV0pwxIf)zYc_7~DIBEWMEXwvrk)@t~hK7Qb)1FQk
zpTkY(6E+tILG)YRcOEA|oZWv9H)jLAwl#H;7Z8)Akw8#{Evx1v6%neu#$&e#uv^c>
zsJN|slj4+_&sY0@rWC|AG732|jj8;xh54x%{6?w<dc7sCxk?pwsO`1AXGzr;fm9^G
zY7?k=))KhIJhL)=C#Tq5CBU$yIL-tpoS3%C9$vWn2*N2+^a9ezdI8JqYcRKBTgBuN
z(f-)AK&HI!BiMd~HfFZLP~pIXk;lgZDNuvpRP}|SZ$&HK_(}bC{92!i(F=k|HqXJD
z?iYu-t|LC^aD2n@ud4`)oqGKrmy@PF(>#zVM;(@ne2`1qqJGcfodnc)p<X*_A%VC9
z5}nVb4}ruT^f;>`Di<}IK!@zO*6&rU?FabAy<pxE9bN|QAWgQ3i6b_vlcSaSzSkoF
z)weVDVVZy&lP`_L>cyXg$%E|L0n$xSPLv!~aj@xd;v(H7YXzH6x$s#JajdV!kc+}K
za5n(9`aDU9vX=ePt!8MQDzJqpbIDxoH4b#zHZ!s2QM!NY=N%lJd^S8&iPv&ab*V@#
zs@`EY@^)|8ZX1WS@W=rNK9k!MwI@JF(wSSlzzA*g2*(Nv0xpLFiDY~mMt|&MrGve2
z2CsLO)-@GkbN=mzyquSn&M1ZyPUgLrGS&e4t?EON@59%yJ%=OCdAeYlLPZ_|>|d&u
z!4fbcPc5pnuf(%RHN9PZcEV)tu+G8w3W5H^)CE~S#8KB@*KY_@9enCjJPwOO)I1MY
z0r|!Bs-B~`wUgz-^<SgXub=hOXVGxrI=@fh%P39KH7B+>0l@^gp9DQ&%9RXVo26!|
z*RL!lfjQz|f4Bptq+%atd;ZizfD+PRpI~$H`V_}!@z{Pn!8kLujg8W>3T6et6b}dn
zFe#!bt%M%S_$&tGZ-jk7S$X+m?sdw#e}Z%SUIQNpf@O`7_pH3o{id&l0rRx4u{RY5
z$z2wc{S5;3o)z8@S5mF3di$<wM%d$}DX++2=1Ud(y!Mz@)|j27u4DB&J^ydGXIwP?
zhYO9-Ji^UBbzgNk7oNJIPvO{e_Ec#lP5Cs!9|8Q<+TJ$kOqM8Dcfg-$+ZM3y2hxd*
z^2}6fvD0nV)$Qr`!yNVsV0Z(E;xJA>n|GT7q-J>Sc3+<%;0n)J-J~JrBjVO=zc(o+
z>yVt?w*YD^U<34s3klR64*&LNJ3fghX~B@n0XO&BFA82w4Y{o?U<(7*?i!$iOsB69
z`x&gRe7?U&KalpF&yXkwuT+#(jnS5CkxV73fzh#0aVSd7s=MHG{jhznpk23zn1>UY
znx-8JGWiW77UY-kp5MZID-C{4$u|w~vkNsz{^tOdQ~dkY0tt0QV+q&8=qOHm5wVso
z5-!|W9C7Aq?Clmt36twe+xqB1ysDp;_Wt7c-~ws$XOJ023sklJJZ6Bb*7uab*YPBJ
z&6hsq{uB)e|Cxr#0D}o2&SZqr)q(V}k059GB&eI5GK{};tfl2HsA2LsCN^+99=dow
zW-Sp7E<i>W*wYa}jr)bc=k&$P=gUo2F`$8bOz!Mm+w8FOIm@SsU5@~vR|68Ki0n6V
zeTeDJKt`Hz!K@kpN>M%+a1xX!bReLShe&i8R1U<lNSp}suz!kL)rWT#OH3`DSFL`*
ztZzAP?hL3kpZUVunA)lj{u<Y!mWd~NoqNHAexXf}UOJ2-x~YAAs^s!IV&tAZCcZA-
zELMSAelYFOwKtbR1$ul+JLg&IZ!~EKJre9czBCZr$H>6qVf_uXUovXY#kYq})iL6y
zaYidHQy@xn2^7!-E!z&soKavZ(LHS}Fx!-7@=Gytur$dIE9f~+D=_;84PO44F==4a
zv+%B9E_+rvJ~iSWm4jvuH}k;!ILE-34u8)gb(X1CZa3c4I^3wnr}KeLmwj%q4fW?`
z-ezYfz8biF2N>YxY28rp*WZukmN);%VC|*czEojUWds#w0S6S-@uuQ2T)w2dQ9)m<
zl?Ls7!O+n{ly8bzx6NMth1l_PjSnF0AKRMcDMDein@#JTi%l;a`tu2?0ghB2ZA&%#
zI``Yq24cSk5PYxDJHc3T7fjqOWEJyWRmxE{@Rajv+-r8gOzB-?$TEmi&ue{Lsox-K
z>uXP7p6tPwK#lDffqf|fQ&)|-Q{-&+?`GiyQxx`BBI@ytdE+|29FtFgeg7hrsqWMx
z^TU4+NvC#1o;L-{h+%fv6eYzs2GRFWJTQr2h((0@a*OUq=INyZHg<^k*4E;kdX3K)
zejNH23s|C8s62-IxA@JpCuE9@iUznQaFkXqhkQL1602@~IrKechd;9>0t7Yk40B};
zM1?`_lb4G?@Ra;8UvWo`0XM5buruQvVhzrm4^SrP1lR#3kXjuD`e>lLFp_!4#{J#%
zC=dgPWz;saN+YU7OIC@;G$=QS4<=KM=|f|uCmBh+;%TwVlPC6$*$59$_~Ct^-5bsw
z533{7jhxf@+2xQ4oA{nl5FGz`bFlhH`Tak7>RQr9;-}xw5NT2_8f{E>Q=VPGE=mU?
zZEiisFjN^*UhB%-i{bD3O|~mdkJcCrJlR103jf@%LJl4=`}IB9p?tTv-?2oPGX9LG
z-h3uCBgm(+UEr|?f&L&L`4TK<n-gUZuONPWWQA_5bZxG6(_8jV7Mw9fuN30lDBz7f
zWM5=_Pp!ue9lAGjK+RaF@A+%o>73S8XNIc)f?r!5;mWYN(y061;#Eug0&@XtMK6A>
zW&lm@SlS_rS2VrTeR=L1WQe+%if<ilfl^%UBw+X3dfnMP9&8?=zJ+%l7Wr2>b-L@N
z=A^pA)ND!2{>?*+S%3NdawIkzWUZN?F^gGRiXSOiTFAo{K5LlioIw1wQ)<NIRB8hX
z3OP>}A^=)neq#eFjh|06kXUXoLq?#`%xwz+&Dv$xMm#ijfhIDSnrLs^9XKa*_v43(
z26`~!0V|JOS6+opafHEcYxPx3`=dBDT2z_zEFj)r4k=cy&5-Tnb}DBHPtz~4Q3OoW
zamStG??@7*GcZ<`8XkK$LbfKh)+jM+MP;--kRbd)3TN9!yUvD`{=-dG30s}-F7z@(
zX_zFV@kYOPfdG`bPr;CS73;-~a>h^dT0;%5!}yO+)S3G*i>o|xjlP#lb&tMxF2Szg
zGXlG0rpL#zy^>m5zEb|TcGR+@2`|3hY<gj@Y<$K6rM5Z)+rEo1l8$ir@egC_?#qPW
zRb8MT)o8l5tMWUt_l1$(Zk2Q$?AQFYgDRArG+Ead<oVixpe^e9W0Qk+CaS-mw1M$9
z2y}#pRNjJ0p6cVu9M-uz#hu@Bj*&!VF-@62=3BHiyBO4gkRkx)`xRjF!=~>FaLk@o
z{VG=129=4V&N*eN@BDgiB<$F8ts@2OOD}}bPRvTHY@TuwyY3qcuxUhOCI1+hQ67~9
z<|KcmI`;VA#O4Uc78k0#|9*krvk_`W9Lv$Xt`=m)Cno-;O55ctC1emQ%jnIv$jP}N
zU3^jn{gd()xkm*SmYB~15m8=8b@qY^`lxFq$VFTm)IvTo3(85t8f%aEq!@Rg!$u4q
zon$OHB`13fdL5k^lcYxkIb3_&p~HqJq5ssa<Gr6c75;=C>lN^}6P?%zPxZHr`<>cd
zWxs8<G_qMnIqjxFQ85n=^d#yViO-JLlTx6$M%(9N1GTgi=u%&+QQ$ru2Rb~7zF_c6
z_<ls7<Z~Dj_!n9(S&eMI(1lC^PSr5tD91b=7%IXAYUmM%)+_qFN0>$XtLm5?hl8`#
zz5G!&9&)XWk&2gf<<+&5GLWdJB>o18{cyoPnX(xNAoB{n){x<sCMuh|lHw>et@GFJ
zQ_;k7D@qI2R?Rm4U*FQjf%A}e6Il@lp{hf8gQ1m#j{*yFVGg3mzS|4wK!Z4m=a7;u
zID?MtFvNx8x`bHE5CTaWf$xqBr_rCW&wiH{->3*o29nUh7UfNla({)k!ZGpDxiY7K
zVITm%a+R1CALWifa5EP<N7=4yA^WQpf^NK78`Vei;(^`eq&!_Qy06;=1h@FPT$?%u
z5*|$b^8Z9w?5Gt`ArU5>`!+r!h@(sVNcADbpBV;M4j3-X`uo-qmD_tP?I>I}5l;$X
zPG%-z%)>>d>R>SutHWKQ!Y4Z=i)~+3?Z}U3sZUU-D+9TY$wJmN7{zwb;9zY9$rFPs
zG!ln#PFTK3Vfq67^gvcyl;obNpYFA;1Ir}L;FU(+uLs*iNF(^HO;UJNL#j%bc9<`W
zWLSs86piP?#Y=zA(kR7p_D(&)(G7F|7!5V+G<VBAtUChz4}D{2x9kz9)hD|gSAoON
zj5}9+9oPV_1ePOt9+xYXxbhHZf<QyWM(z5vd{2V$YucPC3*5~9J<GP1yFKS-0e@MC
zkN&RAg(4p@>HWxR>dlG<*n{7lwBcXopUmDHzC6w+&EP>5DU*_olXn1@qW7}A3}bn%
zK)Pu*(n$w?!W&GYg(o@uw5B!a4@B-8@cl)vt~_utR-u?Jsx@C;gI6B@z_N3QTNz~F
zD!#GS)6P0@flru~H&rHg<?xcQYwlY;#4UE}MX_P!yjK0t=IkeswYY8c`AG$D+=b1#
zJ28^?I#vuJ*-z^wz3tQ<3_HesWhiamgy#0bH>)CbcT=Ft#Po%JuJBa5z~ko!J5xb~
zIJ<zTR~yuds9eD!#KWy6pwSM*46dZ~;g4Gp3o%HWBh=qO>~+*l(%3D{_r@{b^9y-7
zs3b_EkteidrsU?(vj?Y(y$*`~`}Rg3`#^qg@d)Q(jqN9&H(R^eO5B$x7+y`03ozQd
z>o&xH&dsH2HtCEvJ;I+JOKem@CME9M{6f={cTPZqelD7DB5g2FH=;0C`Wm1bn~gEs
zP84M$<%8i-6x-Oqxu)G8TUrUStzZx@H^Hbo5#T2#!7!R2Fc=#BVu$VLjJY|hSk7is
zCCFwJ9u~s+o}IkdS6vUhB1aW{(ySl)zBhanUG;X1soTPiqfx^$<=10YdA<kadtne$
z%C@OuI5k`%&h1L2eR|)W%%19@F9js;l^UP6H0smev-M|@%R`^I$3C`l;M9Lemqx8f
z;-$7$DrPKrM6dksvHe9XL~0o_Wv*LFILMAz%|-Y-F$ak!tMAL)i2^~j7ep72j=N7s
zkoEGO3R=%5>Han=ipEPt2a}xj)%Wfmb!;4RHwlBmzv>j88IUt<>1|+6?hQVEL<%>b
zfYA+>&4v%&WXQqOp33z*JD067%v_}z(1Y?syzcUG^+=+}9FHNpaGGxLWl4Z)7}@9Q
zH7VGgtG?J*wQ<HSBxdcgY-^Oxo>`Sy!sWubM%#=XTET<9q&;sU6?MYCw6<n6IN}O2
zk>D{;3IH=j#pa>5#|Wz{=*h@lW}{EL6Lun%_OPP6i(msVtPgWWZ51#wFAH;;!+zdC
zC@WwO;6brhv6azl@hV5W(R$(#l=_Bn*@zhF`r1y&(`r6oDYYwQ|4ng}%G`blv)}(9
zJ|Rq79d6U_nD#~P8<!!j6-_6FY{h%U1fs|whGb_GxFqRiS<|%ZhtO9S3wS>;dQgg%
zl10S+=uz{WqFyh|gjkCB?BN0$zVw$3JSah~@N~4Awimo1DBH?|skp)m$#IK2D63e*
zQD7JkqFy(EpwA&I^Rr`*6m9aVu|@(VS6FMY_A1cySzy|;t#e3Hl|cU$C1i2hXz@jB
z)J@v&AFjM!4ax0Wx8A;T9tCu}Cr^l$iP&oD&~`XDx%$+Vfm%m`k|r%YGuuG(t?<(5
z*l02ydADh83Bv<*Y_V1BYw=>-SEhUaECKFM8JE1x>}BNuMM{O*lq6gFMZYNBTfGkR
za@62iCBe8G?B{PwU5akrTm0SV3>0y@-2=f>cCUIn!&PnFb&pHF3|}o_yW0lOd!wm2
z;wcg$rBTi#r@xzf{<FcgMXwuyu^-tSoyrIm81Yy+OKG2g>Nqwy2gj}A79UuTc?L-B
zwDxnwJ8s%kT}+)-hP-ok>75PgEHpTt-2!-9@YOq4L&19?F<cSmv=Z<V1@_xMqtEzE
zTi()IMdOQ>!+`eAT`0`-t&vkvg@4M^wUZPau-<+%?d%`9K(yXf_-YWLGMAQeR&`-D
zw1>aM1~Jb#nTgd0to|kgx*lp~oyiz-)TnX=;kyLyOAL@9Z0iXmB}ANg2b=IIvTSZ+
zjH0Tw09L-bqFYX#g-MHe32WW*a_RaN*n$fbV^pfvQ)a&aSXF~HJ%!JHcy*I4>_<{j
z$^3)K{(hj%yhs1J_>``+JB0gP1$5iFM!lo}W31^%*0kf2Z(c_6AdF}D{cF2coOwYh
zlhvK;HBB@LVI+8*anCN)mvx-(=?3waU&dNF+Z*#X`hnnyx4!giKq_iSm)Fwmsrf(S
z+6^lPE%e<{<P@_LNNVdC>+(R1I|6560u^g*S@Zsq?*YyG=e`xwM+R4SS`-MS6W}VI
zxa@^Em*xMF+vs&FPBXYIC4kGegp`7VN!h4|n7{YMgfBN2aGE7ebZ$xLAWEAK7i`7G
zTV&&nM#H6S9z>A*xUBYf;Ql6mKqzq%CW^2F5c%cgK0&kdBO=cLER)(W<dxxA@kfRy
zXfx0<9J^tXbt|Vy;K21EK{Sa(DsAcJ51l{ba65{lr?<FkSaCW8%gV3bdZYAbMa72|
zHi(wkAN9CA-6*SHfz>{F>Uy0Gch&x1K!1SA<9O3f9P8_)qb6j>DiH15VI^o346w|d
zOeUIxs;|)2!ZCl}<roL+D{U8I>^vh3uVBKmPPTC4Y*m28MBXCDsv2HF?Nw0>k`z>h
zWjz#yL`N-{XBxl`sr8h%WuLPUIh9(pwgW<`#gla?)NIfXlh|_%g<3}<fXgv8*`(Yz
zxl<wKZn^N~UC`8B?N||_;Zs7A3tqF|nr5DesL*N$No(Y4fVhFUspjKEpI=$JHyvD4
ztnRE-GHM1LFU*v-?0gTJES3#>kTos-yNC3BcXSBml2V*%qsz44^s?Q?SB9|&1D~d(
z9Pd;cg~yvaLu>k7wpzG-0qXh!2YS&Pl-Hd7QpqX1rUHXe&eGamMWp)Y{td-zms!9T
zpgdNSfC%DW%4tCfn`CF*$jl_53=hB2l{H(a_p-`UcQeEERy}ab@F=Y6MBoki&<xDm
z+)wI0WMD2C*gmKks!8NQ1<s7UVH}(&UcMi|dIGVlGP}_GANh$6<OV>zjwOrcti4pS
zJG^bBF!NwdDf4L|1Ek=6ohrMU<@?UJ>lXcq?&ZwA*YthR)}uIeAS+Bu-u5l1r?)aM
zXSjA^!ds&_@>MCMNu^S0!8fq_syAmb{8JA`LFrTE7ew>#&+;M#x3mN+h!)o>T335U
z9mF-2lvcAMH5q=Z(JkWY5j$@S(P<nijFH_3q5-o!XR|27lV7|@;Rf560QYE=INSU{
z`G)|+mJy!iV$1ku>P+A^In#6PjN?2UOkkH3TL3b=1UzZ6j?V5nB?m7@t-iG9+WhX8
z^=%T{^Jadv$+<hT^<OL?-Y3>U5wgG#T|*Qt7RG>0J?p85blTVTlrZmKpc&>0vb23a
zo^3JjwdeeKgb>-RDWkH}CaZrC4E3DjPn3drbYsIfGyLDNz#hv?N9^y~3Q}EEhmSn^
zz|dATt6FREDKkHeqpW)wZ_B*3*-WBK1^HYjqK5##kl@8xqTWYrK6_j+c?!6-?uALb
z0~*ZKu9?z1Rx#UCIXet-x(eVnBtzV4Uyo4AY}*F+)Ic=9gWYIxQzwNK5cqucSWu!@
zHrM}rSVNr8!w)(C`q2#Z-YKx%TNjumqBC!!G@=X!j3#AQ3Donzw;!|>llcYQa=)N{
z<KRbog9TNmoDr985n*L(dcD3scJ8InCNNmD`*?vv`FWzp$U>rllngSSUF;KS(r(w%
zACG5F;jWk3KS%6Zqn{k0JuUXhdtC|8rC6qtkMaEcDRS^;dQhD5k8>*Dp9^S@jJ$g%
z(I6TJB(AU}aCa(BxnzD=d?V$m_@6J%2HYn>qu+&kzwf|2YGhdcx#YT@Yr>B9mxJn8
z!KEW;;^O5k@M_fEdidHn1I)%uj4~#J9vfc7&rvf!j?m7f=b?Z;7B{cVXa3Ef`pOd3
z@qoPx!SG*0c;LXm?-C5^gpSmT05@iujG&Lrcim_|Mi_%b)|u?}KbljBI)?GFz?i7-
z0nf@2hn#u}>4`lj^R9WZ2?JYr$#*sgD5w8QNgpRC|H53Sn2OO38+;;yaE7}K>1!LX
z*Kj_Vj9#8*EQLS6$4GC){RLrWBrX)Ti0_NyzLhI4eR`_vcI#4~{~Ek&PK5nLC7xQr
z?jOfCqAH$XY)r|qG~EQj_#`z21hLpG2#6^r0JG^FEf(Txm?7649=xT87~bQ)a_b?Z
zNu(tH#e#X!<v|g5ndk@aR+SiPD1pD8l#4eQ&R{wRfnH9aZSCK13B%(9=MGcsFOHs?
z9eg$06$oa(dcjo3LD$*=IUYNEtm&3Gt&S~XoCZU3b<}xmY%i)B2w1HE;Xucw%qh6C
zqJJsors6ik3O~)}$^Lx)kl1Mo_}8nDqvU%gG#l3QN(0VQznqo==Rk>0RjyR05Rrcg
zx@ub8DpuVNF3%E<S`eF`QE~iD)jMXLUR%@_DW=ejX(Z6UK$2FFaB|m&h9rP^d}*Z~
zXU+vAc+tI*RgHIt_zU(>DV4nYJg%14u7XqITxDb%P0?`V<|(Dw1B8pGLB~Sy1n8_9
z6}chny?r@Vo@-5{7M$Wel{294jZs1uu=ibPjm~H3XCmb_c#QMn0LTIb`Cj>#O-DKc
zaR!B!ToA9t<uuUpma+U{p!=0^#?ez396etb_QmK=|K{41VS<_sFUkojR=wjbJfm8q
zsTAvo02PXM6J)fPnGqc%fV3q%*g3#Aq=O}vtX+SUYBs^)u=kEdn%%&hKm`^C*aDrr
zW-86KXl1kT)X1cm?JL=jb)Q;c#b?9H^1{$kc0dp>)+<qKxR*B&nwnI)!`h3qd~rcO
zCoUBj6owT>ZymC5ivu#zpQS3=yQ>UWBfz)IH70&dXKmcQhP`^_Ows#IFcw^e{HA{y
zMmOI66p2^=srDu2+-*`yt>LFSC<gf5yer%3r6rHe<JlM~2|gtSuf>F^iH*nwbZ}PV
z&`PO9P;35gz6usu>9+MmH0a2+rrbRXf7$f&{0RP#RA@o!)2#od*<>U=q<hTODMow{
zp$UE%Q%?&5WCcr~{p6mt&zoDEZQGUR5rplDXKms)-mPR#0;!ld5BnKwjHk54+viRs
z;vCR1Z_qL94@yuXFn-%mQi@SKfNE^64dZexb}6hZg(jBUlS;<UJ?oTl05_F3;z9~!
zB9k_LGh72ahPfbn!}uihg5z26;D!b5>^$H}Q#o3X4HY-Nh@DEi16vS3Fc`AN0Ak5)
z9ZLJE;GH#ky@uPh+GtCg2f^@T6>2Kel4byyJDpSqUowagUnLZ&&7A)n!#&)yX^m1F
zD*Cy~>&4J$%p`~Y!^UP5Fis?LUclMlG4kOK($VYH0xkg0Epz|W2_jD1ab@|oZIM(&
zBnPKv5x*I5d?W<hz|L}Z$Qq#IwjUeC&sk|c+8<n$geDA}9%3XkE9Rv{X2YM^<wAD>
zTU9uxuNA<6$N)2mWW_p4w*UDKXpo4N>nas(J;b}hbJIp;OP-{Hxy!~d_R+J#ZI})4
z@>hW-y+LBMJjl_A@b_F{9)6M33>Aladt7I9R*t}b>iF{4X!H616(e4A<Iy!KtdJTY
zs8~O>d3DOwBzukDA+x`-b(fQgHRn2!XO8;fp;6jq7b<#VuwIhBo!_4cRl$T&L;<Wl
z_Y>Ur@To~_7{V&kqcA;-V59O4aKqj_qSC3oRxM(zq)*55PENPYym~bE0a!A=?edu(
z{S8VSmYS?w6Uj<1EE4xyK18fcwKRI<--Wdf_`mtO)u*~<=-{3k_1$ISa=Gp$@0u$v
zTFqQ(+BGi!bzMF@9FlT5$IR@z@454##uJ`T_}WreK@CYPkB@avS0HFG48*G5OS}Da
z6xp!16*-LvjO`1S|DGOs34V1M6dJU?O_!L?As*}Z7fS6-&o-P*Yju~UT`TGTOYB>;
zaJG8~s?QU{`$<iDw?AT!jYqR6VPl<m8g}*h)eNU(i?h9R8!P1VHW=Rhf-_CVYBWoN
z5ssoUSpNzX7iBWJ(#GKBSTD6?yjONXVGG#TmcZxF$kmV-wz$38OQ4;AoTc{yi~F^R
zL9&_>Q#Rqq{$=xrsdV}_@H^Fi!*rIkGTuy6<^gNW?Y&jcFN#Ok?le@)aG}&akbYZR
z+bb0&Tmjs%g#M)9zvUtkheLYv884$peDPJSmHJ=fU||t+iUmyw!q(}`x@M-Mj5KAS
zYi#N^Ie3SMPnS;?SGJttRS})yW*<El#Pk!$iwbk!@22YdnO%v84nX0Hvyh96Ogo^K
z&n0Yq4tU_%>}lYkzu$Q%39+VJ+62;pJEv5CX}<3+iNwh5Al#4Wa5lJq&Aahp9LBe>
z>;SxOf;agLcsfktuiy7cM-fl)L5VJJ`csyj0;U?0;S{*lf>xG~JUxk0n1rgA?`t5b
z+FvqOHuHczPWcW$Tq=R=lS?cr6#qTwrfh2*I1%5`FtP@pTnR%;d(GSK?!I7C62{u2
z9@J56PDw-7nupiGqpM;>2N_hH3|&(3`}mFDrwo6H5!pjtX?`!nVNe5%|H`QaDk!Un
z{l@5NTJYUeSdh%_Q!t94$=_3@h>O1)klP!GK9w}ncs84dsQY%Kb$i3)A2C%3iE9^~
zAO7dd2YbL-Ch(;@{JgWOmu;be_N|G_Oh8pq6$9?8NHUnT1@J5~0XSgdXIu%c=N2<e
zp>Ks59@s~GsgQ4yKn+otg8`D0PF~mz#qUb4S--DS=A>LBQr_jY-+>(1ny}uUY)Teh
z6**wKdx<%jV1Rv2uElCzg}9O@hy0x7XE@%Dmskidh=EuY?AHaoC9?xChPi8ru=tyv
zZKhGr)a3YhYP~s7x!noii@J|e`Zy}JwP~(LV`2#-l}qsVEfVtO!h(xaq=G$W+V4dn
zIA6Gb1zXcRzhP3R0%Rg}7_Pi^$JbhX#$jvQA9$CyVzd{iq~qqfDQ<uLGh`lTCL(`<
za{GdB&)MWeMa5wsMzCrI<4rGPzdOEV<;31bKLZKzJio7Vc|WFR1Ix>-_fR21#tyAl
z!J!1qux)zfM+p0W=Hd$heyJJ^y!Ij3ljJi;dB#<Lnnf_L55hb;ROKYGpoAeEJfi4E
z-Rml#7pGoq*g(sIc3zch^CVBYm&gN%yO@X!=deaJM_QqZ_kOz3k`EBi7)h?&`pC0o
zip4`xt-|aP_eHOciY_KPc>rHsN#l#wik4+)FMqep9StETes1e<4Wa2`G1uaiPJIr(
zqP2Q+L}_EddhkGfAI=L8a?uMPB6lSRI**L_JSMLz?f{~WRjQN%2k{K(-qc6Xg(AcB
z!Kzr4mq?oP1+d(=05Ush8XGBBQ?A>E1l%#tAkNtRvSn%=2wjKY)cs#MxR{V>&kfxz
zFn?&d0I!b?8UQgBi5k#msfV9>T*AIr%_^f0899^I@w)pXi$ol`YerKs4+X|sM)59z
z?KFIDjZq22BWS2)YNiO;o=|)i3{fC|B<t5(nJE75E!}ALFIfm;zGI$~<+OFska49W
zff@_NRz|-d{Kpc*H?>~T|I*n5S-E&b9oYa+%J7|31@>N}gO0e8FD5pNYT%gcW$l8l
zjKt1Ht(Hb1n6C^YVsF9L;b)O?3`CELi;h!+I5E1nq7peBi>T%hw&ZHr7$336*h`sr
z_7nppANcLUlJNUOUF{T$-SCQEc+jsZlJ&6uALx!)sQJA<<FDW6%U<OgKP)x%zLo&(
zFnwkEK9bqESU^aL7<5*L>QZTKoIH2<`{EPZ;Pa(x+E`~Y>^&>>Hw(|leMVr>2kcv3
z^yCq)c7Qq+K8WmHEkaZ~2;`AnR3c#2TVbarZ9Pj0UdUH>haq~{ulm%OO-C{?UcZ~G
zwLrHKxSIZSzF2eORVN2*KJ1c>{#z`Qpd#3QU)1SgoDrb!m$AEzE{A0gV~;ROUx}GH
zapy<wYS4O!{s-w3JdJeb(UArD<)DuBy<<Tgu0t8t_wmcgL6XkcMe?XcQ40#g2<2e;
zVN+Dk^cx`~kv0P67mh4M#xOR^@oedXR5%ENd1QYk=fXd~6c=YrEh&G8bbi&MrJ?<z
zO(khaKdGqyRAH|(zXCs}YEeSR_m}gspvBcMSt9mCVJdwVPN?&3_&Ue4wms|%$_cAV
z<yu_BaL@ZT#lPNHqZ9DeW#bDUdB`{8MU8M%U<ULYw0oV5(e;Y2&Gii%anoIBg>DW%
zWXYIjs4)~vAlsMWCzql0tZer=^HXXUh6Q2r1V?b9OfTAYK<5K|4@P$QxTN!ka67KZ
zssY+R)kUzNn^ZGWn0QgF$xP~5b5rmBwEf4#Khx&0^}%3t#TO6UfPrFzcvFKZ&+tE)
zAp?$wWx*`XCZHBjXRopZq)1m;=U}m`@-BGj@kFU`pq0PrW@nC~{PCP=N51hvFcqB?
zY~va?rLLKH6IA^zVDQz7%`jxiAu|DAE8Jd6Qzj!XPH{>;_1*-#JEOL{$nJ^7G_x2&
z`7iUgF_AiG*<E2Xsnpv2odt~_iTfIvbsztH`xgsHrYYR{@LYF7v1oU+_#5qtMNXUt
z{Lc@rBE2BuS{G((sbu)UB)<FRQ0>`>qu+^EpDeQ0^~(}0Y1`KcQBFLxt9>h1Vs&$U
zm`UVHf)UrA+{*u;)5J(kb0Fc{86MK1TzTAt<N=;vpF|_@e^E!$<?NiiA4$QfRuWS{
zu4&N{|3bog>+lG*<N&j(;g#M<J3!n8PSfp`6ypL3%{Cs-BE<X;)n1eL&W+-%0os@u
zK@Y4PbYZwT7#sE1Rpw$AyGfS>Ahe3Kdn_;`-(B3p!cN2rA9`Yi<w5RBZHv6AsOYU3
z6&h)<5$F41eRalRPv$;27q*iiGeFrB6G^p;{+m$L3_RO){a04dD7e170Ge1NWDx;j
zF%`Z+jMlpX%OH4$XQ^<*D67ER+<e(UEjIEH;QiK#J>B@szJkEgte(XRLO?@hl#lq9
z(qVjC*ax%*`o}gPvs3PE1Ti>AT?zgA)6YH!HfJ%KD^7}j3D0_D@lG=jbmELiwXF}n
zO<WfO0^V|=bj8%&(fn_SR?Ud_aslr4Mgy=#z#Dkkk13WtSjg~_BCN#eE8@%}jyg`%
z10LG}lq<uMJ{c<l+2(@Js^8(~<M%Ut8mg<#K=TG`&odysS-*pOe`hV;u*m@$W3Y4A
z-z*@`)QsKBl()N3uW9lu4M>!<ZKyv!4RD-cd9RyoVIJ#U6d;FQH|P)sekcFIv0+z~
z*PH66d_XJ8QL+u7K5B{u%fiQ{@%q|)%zZQQjzI0<yVi3k&U$pOL3}adW>LZ%j7boY
zv<LhHbBV5<gvAn_qO1UYmu8+R*#K94ad6SaCkO1QOh`14S6&Y-<J0!r{D4l&K>n+r
z_(+kZ`vEqe_tby?>+aJ`>BqAPzN&yYVB|9ny68AD4-;O^a)`ds{0sbyT1Gq_#QcM^
zl>u6sTG3KLuD<u%!R$fI&dEpC<@Ovi#UI!~n^hHr(>CWrxtCil&~%5q*R!e7jgf4y
z)p_9%H0HYWrnXPNKw_%IHumlN;*U+)>{H<RSofflaC70HKhvyJxWQ34aw}cFXg6(U
zBP?43G=dvy`FhlS18cFO+Ti8bR;on*_0qDOjXG!*R;>6|eKBIl9tF}*?CljzKQu=(
z;@<9GVyIoR{+ph|5Jc-UKS7Uhi8~{6Z+n33#S0Be@G-y_@ZcO-IL;LWkEAlWy&GgW
z!wT2FHVKSHkvJL{P}c33Oj3<NoKjwXLeKX*koXbs>8O+QlO*nZC~7iV8;k%5@N~H3
z4j7?)o4!4@EY0Ixe6RcRgt&KEc>}Y<q)R$g#KcI!(7ywS5Iun@4&(9UWAPm@9p3`_
zDnla0@^z!{6g1_*u3_z{5Z<Sr9=A0UUTEFk2w-N97`o4Du+jdW4Mr#Rz&E_bn)E+|
z(->Em<*zKLu~h3XZ3b>-F%t;-MB~T>#;=Ghfg+m0T8wU!lcH0Ji_1EV$tgDxp!hlC
zDc62uolD{B%9SM$U?R)CK*}=<bJU~vOw|3j2fiGT;8~eR0~Ok^&K`@04ujkJ!GFjt
zqF@ty!`I0?DDqsYLz*rX0)V6#S1z0r_WTt1pcwj>bF_3Hscjk2Pxf7UT*3g8)elsb
znj>z4RZb4U`El%NT@*wPH?PA5vs5yB=QWg`bpN%hThYuzVAq$6&9V);7s|c3_Nq|?
zZXMiflBFaAhwZ%sGCz%oGi(33+@lYGAmv9AEFcV2x;nE+eYPqVh?Lu^I&*sgpz(Ns
zWWPuccl4l%!5@VIa;OoAz+`?0!crJLtCZLg_(3#=wlpyUEU+KQf#h9=fI`v534CEZ
zQD!C)H^em87=eH8&XR`i^B;k7^<qltXE}nQ@fL3SAwzj!VVZ4G$Z;b-L7Jvym)gMu
zUlNRMk1f7ueaTih#^=TAn-miGzJ$T|MFoiDev7Qm9)k6xR3dBHnH#nbe=^=c%5fL5
z5aXB4j(zZ=F$U~#6D=p^yk8L0$Pkry5S1hB%3G;VZeZgtp9wKt^v(CIQ%vn01CXL$
z*iY9)(n4^u$A?me_p<wvq3~k>8wcfuTC3CwzP5dL1Yk_o^7G#lAH})F-uaQJ5G@CO
zj#T@fJA~5{H|L9T&|q^{Oj*GOP>N3GfwYaUqAbV_eLmv|pwzDh-AI>HX3s(dFMuBV
zT%Rsd#wXMXp9g%>j)0?RRk@Qy-<|@nfdT!D5aM&D`w-aW0vVYeJv{{4a!86*8CTSL
z_|2K$!2ev(5_PQy>?;!h+4MS%z2IDcgSzVfZ=Rf($m#Rlz4f*b{ip(~oQw<KxErLJ
z40l_A#u0ws3`=fW;5Uo+p<Nwn?guizpR_ad-X7#gXf0i)_lvOC1b;x+U*IT7Ef_r2
z2vqpCDCe3!n=+*Dn@kE=7G8J#E_nFXUPTCgZ5J$_$?gYAH~!Ak%qZ=zq+f)c^~&$o
zC2QYf|5d!V{K_g?!W@S1`3ct8YS0S>C5gruNHyob>5t78`ZivnfB2_MwUMz(?ThFu
z`&{^p83%?s6olmL&GfLR4bC2gSl9j&S^wys@T=xo<%m<1OS!0vSz(lt=qy|ECp?oU
z5USnA1-_Lo8lIMCPBMNClg2$Aaz;!0Gc=(Bv964_0Z4ml0$5vLls;R|AB1^<Cl#D@
z`6wU!mB(f7r8Ob(yHW#-HmB78<aD;(mA@xrb+aAJBNO07up7i4WdEG^Pz=(*Ue+2p
z%D!kN0ZXL1rQJS4xO<8b*Frjfl=<f%A8JeBypY{Uz$&xK-uqR;+#!AtAX;T*+`Ar1
z*x*5$%t#yaa@>T2{<Sf{RG9v%KJ>*^gEP0ZWr(sfq7#J<8ZVj0mEB#-4qui*c&BFF
zWaj-5@LL!2W<A(>Qi!Vfdf5fObe{D`<;o$XG*~DFnv~%75m&Wsh+Xf%E%=2+#3tu~
zC}+pLMVTtxnLAM&*2knyqK8);5znJ4K0A!oy7jjTe*XW$g5QU4!~(^=|F$9fIrCzv
zsg_{rx^ai1x7>c0zGIqpLl|+Z*Nyk3d<liI<mxz(2Q`alF~}MWO<pxWZ5(aq5_WZJ
zavhvLwv|q8XFZDOHSm*V?JhKVlAAsF<YT08#pI7tG@XSJ>Au$N?6bnif<cOXXT<)t
z^8}!(dd1AQ&5<u~eRCOL#Jf7uzr@HK268#EGz1v4<RU+@E4`VsG&%jXzS>dBi9eY>
z2VW{nzN<OXOZ<e@`@reFv!0?~*8ZKGRI-ohR5&jAGyF!c*S8d_HFNJWS{o@0TV5e(
zL#efW!9M|#=MWGPzDGaAUX9HvFPS1PF7`>oY|jH&p8PB#D<*?E<Lj|NoQ2zbS3;p#
z27L3$suanpgukA0-TZAI_|m)p1%s%d0Nl<UCvY?V0HNFZ{xvjFWM;Oe%42u5hs1+P
z^O%?Pk?ybZsWaTxA$j3O4Fv4e!2VqLwopkcQtU3+MC8$3qW2AgBd&v{P!b5QRe(T>
z30lI#W*%#)W#{>Vp<~1-_=rj?G5R&ehS=Kk9M`MR6l;)ii{3=Q8Q7o^;0(#PcvY&a
zmyExFE$&Zbav=WBofCNDz2pqZi7F@508ZEmU>Qy5>K@DBX;m80f*}!1)BP}5b4Qv_
z9LRg{f9nP({XZ$sd~+l08-zy~N#}jtXVyj_Ut*Z}ii|Rc-M5vldQJe;(3>u0b>DN+
z_!8e5-LVwAnquQr%@8y?t{kphM<tg*ztfP#bAR>R^kLq1B1U@=6*@>$I0Q?dLb~zX
zS}qs?bJ#g0>ISr@pWD+>rl-+w9AA8$QQZXz(*b)k)m2Mw>8_kBwJ?c@@{zrlYifMf
zPGD0vim-UM(SmVuC9jl6v~Kq%Jus=6qED3f*Y~>F`&bMt1HGm<UEMVE@1oQTBfPGI
zEx~><JV;CY(RJllsjJqg+tSFBYy1{@E2ogw?*|v89e{0bl}kQ-#7}j}q235PPYLlz
z6uI+gyaLZfZ^?}d8d*6o(syOZwY+}#g-F|&RN=S(E(7wHa%ReStn?R--NrA`9GpGY
zC!CQ78-X~!gQ2O|m=+nMcn!u{(Wffa`P#hFQH^C0Ri;9?YHQ?idijOd#tY|AXn0@V
z9=-~`)eA~*PpXiTEn3gq2Jw)1TdZnHgw-P%zxJ)dP58;!Y+%vRs0>tZ-dc;AM(pu9
zRzf*@RMLXJMRA?%vsa!_=n;9uJw!^o8z%$fGS$eZpPO?LKQz@<KxM-D-~3cWm&L9Y
z|7fmu7KR`xwwCxt^`m>McUGXUw`o*2U=PBPv((a@FubXI97yX`z4D}7^~T#6>Md?D
zJYak}baDv{nu%c<VmS*ZKR$LZ@1Z{0kBWQNt_NpxypOE*pnYvZ6|SF~y0f)Av%iZ`
zyz$U08->!Bwpn6>oE0lJ8Ihh2nPVjw<#)zeXkdziV{I!@4zzL)jBdeNDpkcA2?fi1
z(I&F+ttc(#Cs<WbiMlKFWy|SU&Lj7}ueWr+E5HOSH!o})(Kl@#a<|9{O-N#1ADSuZ
zI!v%BULIe8q7=tmlE?;$`_8+s$Mre`hGmNJWoG2k(DNnU$di^D={iGZy{Yz|qHXeF
zAUP@b>jUD;_K{Wm+Pgt(dN_5=4I_{2j~Wt;NUyj}lD_*-Dbu4LQm@G1c3Gf&FcW@B
z|E2*}5rPf^5=}OHg0n42IuYw$*vB%3$Y(nVQ76ySL5i--R7ui@MQMhdn*1rI2fPXs
zM#A3EgLuH28LpHjL1Wlo<{uEi?-jI*o036LaRZ=$zNl2IQs2}XY8f;Cgo3Hkh)=5Y
zaU>f^9iu{Cke>zH>OEW@Pmb2jN&E&d!#4igQ0_3=kc|<aB8Zi5C(o<y=j#Y0lg%1e
zvw$dAbPCL8u5+K_8v&X%kz7<rS{}Df$u*XM9H-2iPEam83uk*`mM;?8lgOaFy-ibS
z$U(~#S`sL#$7e+OV_Nihe0%h6eCW2JFHef0Yq#YJ<;mBopSUtJPTZWSMh>j!H=#K2
z4w%t@q$0pz9ZuT%^a;ebyYV1QJ~G<VLA>GEjNB@W@0MpHrK05&;(=(fc_`(BHQ&h^
zf{3C%TjQJ-gj)O8p`~lEKQ^en-v!2EOGi^w0PSHhwzbCt)BE&nT=3fd)sp@v<L84^
zK}6MhdNwH39ywDuXS-y&<%ww+sav%^kkvD+;P1@*&Z?+XSD8>PRV!{G;_)vQ00^NG
z;yp+JdEOf1Go<rWpU~*%OyI-#_nPa)8T%4B5zxi8lGU9Jcl%zr_o6Kz`#V2N_=?67
zTH<u``%Btec>MSxjQMye_s#VN0mw9m2q{6*B}(4g6#}@$N~_<qGSZc%oLU{}#ASQy
z(I~_wK%<mk;xi~yKBx76lzj(0)%*WHZl^+;#*K!CJ(Cedr={$@S7l@-LLswFM+4b8
z)^Y4jHZ3cAQ@E9IkTSFXueVX^-uu13|HI?aqvAN9^B%AHd_AAt&ZCV$zu?<5)dl6s
zj%XgIf_pv|RtflnyNhAeM5J)g`rcEELd#&wU7E~%F^$(!eBusD?DdzBoS4Ux?Q?lJ
zkgr>IYeAihs9xq(#7BFBh1z9`Wc_SGnEeq4cAIEijh#z&mcjV^QJW8^$EJK2<jwS}
zvh-&&-jN-&uOV4`P2TsR%#!Jzt~4lnz93<)?Pi^|sC$#RpXtxE{R0jOhH~L%yR8`?
z+kNdRC6EcYBu^xF>&#e1O`5lRQuDScX6W)>n!!c5+s}t7<C7Qe4CbqKcL!fS7b*fG
zp*q*y*m_w+Tv}8&$XNigT5?t)eCKYN(rP-zc|ZjQHuUE_C{po%C}9y_1>0~bK4tPg
zY5TkEy3C@F;~~mr-jf*%$#K=@cU{B#M<RXnc4tUOoj<R4ul~^PzL7hm1S{(1*9NgQ
zsk+JEa5M{IT|e8EBq`>6`{sUbl69lC?fkRWkt>C>#Um4)TE!6)`FU+4pCM@PZsLm_
zqnt<~T`R7c9ZxnEW8->SCQ!xw@qK1O_qbT>bvUzyS2r}D?b|I@_H}THQ2+I}BhF{%
zMG-r4>jvTtyW|A1LEV${O{8m02WXo|;+zKGo$gbm+E_QOo0d*{FZ!xcm^EqFSr-Zc
z#XU|J?^iZCKCC%qUqt^_f!v1Vt*ApM8<QQMBFz_e(^P*}a`xPWta;~o2`PE5>nBfK
zNmt#Q+eLReswf}w+ZT4OgfYS03}OLlR|qxrM)(DNFyi4)WWwU@feS%oLS`VaVLEx{
zNwamNt8ADr>AKopr_@^AvX?-=w`b-P?SwTKyG&W(Wd2OFJ$HYM;fXSW57=>9Q-(xu
zQbcXiVs6VNDVK|tXI*$Z<fr3r49E6l-zLB;jJ?X<TM_{?vV-}jT($s2?r^lknTe#h
zMb)z0%7z<|949IszZxN6pPW)Od(v#qUO+=<emeEqM0-L_`Xi4)nE`uC^KfFgri)RY
zYXsA}mz5m$K{G9x3KKy}Zjm!Z6Ix2)NfHdCsirCKBE#lb->Uh}KD<OYV0&ttQX-2S
zU0Uph29Bt9=N1`8l2{DeIjm!vC7KRJk2bZYSYSX#Jj$@XcjlgfdRE7$0E2!#_*}-j
zH;Cys3%3uB#S}^Lv@X=SwsxOrEoB50OSXJCi%YWFGl}qcZy(XZ!P(O;LE5o~imfs)
zMmO>%+{u+lFu%L-xk7fLxtlAi)Y`j)&^zt6sCsQ8V&G4|!Ug*Nqo>`@PKXm62J;o@
zgIoJFJD(SDP87yu=Qo`Nj+if3R;Z>u%qbNw3#|)=iKeaUZ0BI0I-TR3?5THy7h~iF
z?9wB5aN~Gm_tJ<R+H^VKRc59B#rrI>y%w}|^Xx5@&n4L#@=4J(``=3ueTNTLs_r52
z*N2@Go36W`E@s#yir>u8e1gHxew&t=cOh*iAz!3vTg=|OVmp=fnr%KZCgSa8JW}?i
zlWr(JdtJ$ssn2$|<C0i~#ua==?9nTN=6KnofT6|+R1_;p+-~WG!KN^k(I+8$SmM)-
z;;-4i=-*%T#iz1&L0pU5?^0VHT=8@@EsNw_Lfo;%?}$!HHfo6Y@q~xVh0eP?jSI@I
zGaktwWls24s<35?@fx+}UcHp!()(werM(2_@x|}TB_5MEE7j`mHJmPN;@vqDF6F{%
z7kWBoDj=Z-pzS|R^_LE)ZG;UxAIssG&@0P(3odR8-&QpbUDA0KvS5lApLET;793`D
z<9MhV>K8N|U7ZD!jKzfQ5~n-;#*!cB+2sVv>>(Yrm+A|aEKBo|$x;YsvF`!LBu1`L
zm(kG)gnXwb!wpiTht3vrFY0w)o!Ru<b!6bVx_Fkg-5g2&$cnWMzZP*VMrLMAX~0l*
zSmo%rUb5jyw*)%DSi>{j3{==Ao5<o*(mEurfGWom6$`EnQxC=dvdwKMZlrELQ+BH;
zd?GC6<4L!wC*PlOs+yqn%XP3gZgyH3Q=RZqJKUmhaO$aRr>TqP?##D~5_R{YXPA;l
z)Lq=lgp_uUHo2-k%hmmBK2~ssCu&4>ezNgW5`<ORfR%QBc-u#(sN+NNcza{FkSI}Z
z#wD`YTg<`!EotPLr-uy3soim|qPM$@Bj%OZX$H3vgULX0VyV1KMzM`z2A9~KKI5|Y
zar8xRC?*ZOQ3^YHP%j*ISB$SNH`!CB>3EW>`NYx7(AOf=H_$$czFPL2rlxn{3!FHg
znIJ)`hc#7#>DkLhENWCu$Np5_40F>Kd_awXV2VqWi{G1UB}djYU6)S+YS_CJ2kpjE
zYHIDC5w=m<f5`+&&|pudDh>sQW(g7XdHc8Vd|LCYq~aD)=q@+i-7|DpLpvZ~GG9`=
zAq^kkgV`Noo>Dc5(r+H@G_m%zeLk^ZB3HH8V}{f`g|GFII0b$8l!4C*6XB#I8d^$@
zue6dSF1L<T71+q>R1J)Sb;)0`F3C5Ed>Zpww@JyabQ9Hs83aRxvI}Weeg?c#Q`Z7!
z+OxKxiCazum-!g|y-Gz335-^wRSu4=-{Nb7NO83RGpu^qGGaJnG8P@1r|!=P$Z(vs
z8aOsTqr&nq>b&^G6lWWK7SD4DH!(O8E}k<w_>ryiyVb%YW(FqSvw6>1>Gn0wb<F_c
zqdKctaenaiF5S-4V#^ql^n?7To-&@&S;D}5pzM#<2)lJVyFn^lQocd2{&s^<pM55h
z874-%do~Gar}L`Qa%zwh@{TO+@fvli5<Pdnjm&k&FtnQpObb+5`lZQ?9CGBrw;R$n
z!|Ts!W64JtzbV~3Mq8=V>I5jt*ppJ4AVdw>3O0kTe9JS7?5%>RUT&I9k#|+`8qMb%
zgKI1nL~HQ|#RF^uHN-i=w@b%KRUhk7%gCek5MMWyS(Gwn5mC<tL*<X=UN3$B1UccS
zBi@QFQNli)qB*+QLNuicb_T*9xdSnxCT9iU(=#iPW2QF)lx&=+g(JpebFt)IpI~tI
z3ofy*`s<bzprhb+kn7yG;V^p6G1?<A^)d{A`g9~2l3cvt<P2TXVUG=a_{C?KhYlt<
za@LbNki4qkadPpZQ{9a-MSDbTqo?LwU^KQJXb{dr5u6{>E_9qu`-d<v8U$~r$J=jI
zJ&SdTMzGp#2PE47jAcS~vfgmJ{hnmJXiaHCh5<Vr`8r1Gex(em$=v_~e-9|ouBm23
zl{-=}=92_qSKN-n%WFzTK?7=ULW)gw>(r3;Lp*b8-AU?pgwjBKiacqrwyP1z&n(qc
z{5b1?+jeAD=UM?D00&}BUM|bS`qSf&EuRuppUm4<-fNo;la3uvbuOC$^==vz=W{NT
z<j-uw;vFe7fj$xebw*~*A?VN1G6{-tByYNC5eFLguyk@r2&*ywZsiXa|A_-wxwSjB
zBtgbJyH<!7{a5)DP$1Rn2+3rTA|G7=7{Y(AoIH;q>be!~iq-!@uPH+En&j*DgX*+*
z^aaeyRxUBSuwq^Q`(2;Frt@-0jY6~Z6Q+DgVsh#6R{u!n!&3kAZ}DJ6iprALHZ`w(
zR*xB4d<!hYiZxvM+anip?Vp->kC0G(dfHwy^tg|nl1=}pcdfn<JNZuvx<&@>^mjlC
zlEL%E+Bz!QGADZcm(>^lc<W!@)sGL9)u!)!T7EKY9>pi5Z1U@aw*QkfprVb3*M?fP
zg4?t|Kf&t9qhr8hf9%leUajs4KwG+CTN0+k&&{taAz)qpL8bT~-nLp6RQ61=UoKwe
zUHt!8o#jJ*06wXxljTn}K}At=>9w1*^rU}3P<T(cNNQU{BDZd&;$M~$UAsj8@R+cD
z%R4*41@Afk<*50ZMa9No*te`M$`AYb+kr9_Mwd54Pwo`ltvE87HD`PIC4avs=>L~~
z3%eXFQp3BpU)55HK4dZfv1HgYpeLBH@$l`X?LSSKy41Z~J@QMB@Y`elc$SH_lb(Gf
zn-rx>zwWz-IhCUD@8@YFI<YIyf<3$yuFK9Lqu}LZguOuC@*j`6y7-@{jrgwf7T;d>
z3Tuxnc5<ERPb}Z+&;NYaN9kZvIR5bcF(lHnAG;PWv%K0r5A2U6AR?u6a|<`47~#wx
z>cs!Hhd&l46H#rIXTp01brlLCXSn>WC6BpvqZiw1Enw|uKu`V4O<nrt92}^3#q_fK
z9`TTC|6Nw~f1SlRB$Lg^v-3wt2DcomOWv^d-%CsNyOqh>4~IBuh>`@e5L&c&x!<hA
zKRqUTR`e`8kz#e!(WfRqjQ-M#V<=PU{@0;^$2jNSBg*l0>2{VUlkxmrk@8>cK9&k8
zU1L<!mxP&qYz-KlUF6wo1rPsr*@;E4LCxJ8nwEFlBaymJVs)o}9Dsi~6IlEgxHjH}
zRc$|h>c`vu+jUtw5isflKC$)PL$Z$5xzy@Y?&|-S(~sqbb9m9_@pJWc#0~$kk}HR5
zbypST;HB|#3hh6)v0BPK`@i3{=`pO2eP#xclOl@+YxVzQkxYw_HS2a!SYgs!S|3wa
zip)QM+=bP(`)$RpfX;QzHZLP|R(Yrt+5S%(LWXSk3lg|QjosIzf8F%p1hV0unw2XX
z^^48N^CBzg{r)t|l6NxKa1Z|V^rC0^XGg4rxB03|uX8TsZ;OLZBfI)@!B_sa_IjTI
zm&hf3nquVx679)Jt8v`V*ZtG=KF9;Nvmvs#@9gHEKll%C@bjBLev1$<yc9RW9RK5;
zn(mTi{eE$lR`b^oi&+Qu)7ZkTAvk(n`QKIRf7`?5MP7AS?9A|{<h#_@tf`^`MH2+J
zzb)GDzlB*84(^kTgK!4Usk^`X?WZA{|6AmU-gxP^5Vermc4$>RwDwJi$H~oq3Bi|^
z=*Q|T{Yz^bc&7*AqUKHPO#J7`_%<C`e`I~S^QKhm_MBaR{PUNP(<Z<Aq%y)Z#Xor_
zP+%v*MT+2l%`gd~3*)5j;^J>F+ztQf3-91VK8cz2T>2*7an~PK9hu)Kn7pSxJ*N1v
zUd-MUnV`6{ydLLHg74o>yJ6Fx=NavjZ^^Z<Tl$X2bBgGnq?St?a&N05{@}dbwj<oh
zBIKl>LD6aqr?!=N<*P28cHqg!BtMybI)5{$%m*k0kq;z>lDXLXXAlmqSzAVG{*}yb
z9)<mjvybRk%}@u$r&VkB4@dIne^tw)P%o#d+En0Q4P3R}d6-;k0kuZ`Kd)^1^~-4H
z&mIxJ;%QOXuopfaK-DEUb(y?xa>ubgS+!#q!X;cDFs%P|1LJo6F<0{;hqa9CmoMzM
z&0@;TUmv=={e>mBE0El|{?DfRLnYtDygvGW^#5(+@N<TEx5WjdA$#E7mvA3X3hQ;X
zbS3xm<AJ%SC4^zUd>Pg{^vZ`nd;aCCeC|-mwA;wKDVx5Uu#XO{kBfq+`d_&7W(qcb
zZnRvp##@Qc$uRpB^C|cM<TI?EENLzYw|Qt|JQ`VCUF~XW#S?2@L&CaK|6!l%OWY5$
z&DmsGKiz8i1NY0kja-Lmq$BpNtBrA&ehL!1dnuZ>ub!+GQ@L`Ze*A0Y8ZKQv4`pP;
z)O?12xtku9)Qa$SD^IlgY(IlBbl_CM-RDxBP}zij4jV{T^~-NSj{t9wRsRAIjBNl;
zDW(3yhx{MEJY@I$0kSg5qm<WO=oP-|x_l8a|1D%!<RPzI@d+Mzv~_3J>4(w$v2?G|
z-9EB@*F%q*=#QFzt?Umm#SqV{&|-maSXbLzB9Et9{v@g@?&<%W0RC;kf~kynt|yyO
zYk97I_#bPbwF58y`^v0|`miLhvoZ3r<f}h}Jw)E}pYa-6!XyaxG%TDCQcpzCtiJfi
zyRY>-Kfk$R_^@wa6RW;&COH25)@6$5|3jX{%np`a@uu0j^aI%cuY?85d<N{Z@+%gL
zHAi*py#rY7wf|l^a=*o2cn9RG!(2|t!3nXT$o&8CMf@4ahdH!1@c;5A%td56KelIe
z1%7q+CEf7K`Qeh{rEj;>;?-7v?mt)fTns2*#=MXw{Qe3*&XVPl%i3?O-KQ+%HhG3H
zq-FbzeTp$<`j1Ps{4o^o!qdKfXLj&6`w!KDe+i+NOD9{g5P|#rIL!aUyZ&kmE>b}y
zV#k9|@W8wO=@u`CzN_Kh(od{-dJhu3R71~A`sb6H@=|0XQMzB>;`hh^pNvjH!uvDE
z+laf#&HsMQe)Z`qOT4tY*sJuAp&W2t1Od2AKt!}4rMFqY5Do*3OPHtX+*q*QVNk^~
zfk|~-&T)VRa)pT5o4@~)n2b0F5dw4)aE6W}!L5h1L~umx85lR&V!am;EOxn0;$TWQ
zh00HU+wyw~>X0+8sRXPXm*1Z52zX3zQAa6(YW8qGAZZgpataA=0wrd1B7k|aD>)O|
zIxJGG0P2tjBtEuX2s_gS`m9V#Gjj;7#-M+_6vVkiV&BLH@|S`DrZpgp31frMG)+bP
zXqR()>78Ld*%Ib%2w47DiUZr_LXXt%0jx11)EEXKoNN%{>_n8-FpQmsC=Yd5A0j42
zT6fyVaxX7puA!km2bkt`azqkX4uE@bvN{GM<ikMqJ~mqP?G4i~%uDfOM1(9u8(u-Z
z)C3Jm4MVk(;F8^>ETaX0nZo(FdFYF5umGf1j{NMHpNjmq2WKDdu>$D^XXz}jfFz$C
z&3g8FhBzThkbcjp7VdoO+*;sMK_lE&a@65tZ)j4}jILE~?^9Hmbeeq*V0X0c9obfc
z!+kIeRRgHo^;~lX6Ro7Hv9<Gy0Q_i&I{3)$m*P(2{Dp$ufWH~Zh#3i2keH61p(Ctc
zdjZj1YYHQy`^SXgg*mdZJkx=_oLK2nL&Pcw5FBlXw};BEmzq(%sxLQEYDYSTgpx!w
z)`ldrI{|-?1oK}lNF6;}sdFAqn*RANw92rd_Px8^Wmjtft{kaLy6_EBY>uTaKp2_C
zu~qiO1***|sT!f%zR|K1g!f)4A$J(9U<>i=q$Nh-ivXe8R&#vuhIP;}U>@RAA$#K4
z71=QXKw0O~+c2sTvKxkH2n7iDhq#DaB{6+*&njnu!C=DEgZAAx{N;?X)~GFxe-g%J
zI{+!&k2%6=I)dO{!x==`EZ87`M2|kW)^=e7pQ*AiqdmWQzBISSR)3NPF%cM<YQKO6
z>`5ts;xF)~If$p@{L$VcFhi6Nv+mS{gh^?aseagp!pQ_rIr~iy8HFmx8|GkIx6dVe
ze0~ggD%JBV{Z`|{|284S9VoEBah9WQsCaQc#R?GC`d<SjFUUkz!qcQM?27F<gt6)E
z)8c;4cHAY+5!1weM7C7liH_<W_2(FDT2Dch-M#~uS`PjSmhkhz(SJ0;^B+A5gGoss
z^gCI}m9oy;OFU_kJykhsGthd(rsZ7v1JrgD+>`!~&Oe=F>!xQ_Ce(w^)sW=y)W%nc
zLJ%n%s%l4Wi$X*-td2;rPwpv87WRO-{YcmQ8W_Uxj7nrt(@*L>3<^C!XIwo5T5NI!
z=4fX5gX5Ex`MBk?Y9#NzeoZ#D;vm*!s7D|Z?sHeg9Ry@*F1nq<wx7zIyu(3<kYs#|
z4u3X=l$LxP3||{bmUkAmzF<DMWur~TTLBr?_olSqW)wA7r>NHW#<lKg_10iV!P~G{
zYwP<o<~%#*n9f18d~u7iv9Dp#4Mx=Diyz(F&MF^jir8h%F<;N9a{0eDH>`aoJ`A=@
zn#Iky1Nsn!-+l`={v#O{LM3e6a3oMXy+-81G8RVZbDwJfgxr#DTs8NaE^M#Fjpqo9
zPREnMc>Dd8mo!fuutxa~!}0ZkmCNaF3G=Tm20YH+-6KR85CgXD8E#E6Ad#z=vRcD=
zD{YUL+X9O6K;x^OH9AHme`69{M>a4WbA7vUb0=?l`TlI+K3>mo=(ji&_NksC5w&K1
zac`?{i^oHjh;ii1A4NtKC$C<ij3&jT=^3=1?lE=(iD1d(lqtH79J8DY0OLy6<qdE+
z^dq$r;=;cE<=3Kj`I`@a0@W2>scM1I8Q`oMf{AkW19!iNkRB%iJ)r9xjY>!4c5)LA
zwL;Tum>oB*7wlzi15tyj>d5TuSmD(z|IeTYe|uw7Dts8B^s9Ha?7h7A#@7>PEUuQ2
zZ)gL8c;$KlYC6`N-END8icj+NrT|zNt38DP;?QKuRjp#a`eS)KK2Sve$1Zscp`y=$
z*-_BkB$~?6YZ-0p1jn<TXACAw(qYWAU0ugY&9(@-W%3oBEu=f(r(?nKrA3oCN^Hs>
z?VAKefHtsQ*0Il=;*mL#8x@1Q8p6$G3_KN$LttY0hwn=HvH}v%#RQFkb7+ZL9|oOA
zbZ=xwYMA^3zrqxBYK1RbCrs&tIOVIp1_{M#AgNbT`5N7!dABseT(Y|0Ntas(o7f(^
zclpA~QMWkjw94uL24~?mKM^oUi2j+EEq`FVI)=EGcxC0{Lm6EXJ}?8hO2uo0!Yl+@
zL}R+#k)qiSz`L3qkbJ<85$7m|=ja5RqT5I`nht_n(kl!@51;vX9ra7E1DHH#_tm$D
z96;xx1N0)SwCP1&!qgt;7gnM0q9%6^+`-uMh~LFAs*=N0zSNE?62`OohCsqK9oFD|
zrYpE*zKRp%7RZ5yfl7Neh_<yW%kUT@#D-OR1*`s}E+Fse^)CeH-3Bb@rkqm{2yn!4
zyG4P#Lm&kOV*S*C$4Y|EXG@F>!$={JG!Z`bqwx?I-Z?927mX`TIFT+><ioPe8sDhv
z+MMximSnQ}lqF-;A|~1mSDJ91s9e?HK;?G>OzJSWU6HI9@U&gC;IT+qHt_MXVPsuh
zYKLQ74Un{iqnM#3%R(cCJ4?#gqR4sb%d^-?J=aIAup)gJL-Sb_JXD<;m{4}rCfLJo
znxNRcBksyu*@g}`G~RlxVDNe4H~dHaVON-)*>7<KN<>H&)vAIeU_8-&Q&M^*XZ8y!
zuIt^79aE2)GFA&VoZMKO|9y21qDGZ~k8$rPu(~`gD5O|YygZ6Vol5rxTh~Y4=1DD~
z(BIkwv#=KcED^%NZUmnwM?Op}s39ef6weYckbVJ_UWTK6<!z-p*yVCdWpDxskAnd5
zm>V5D2i}IDl>9;%&210&M?K88`S_+CA;m$Rhg<8D07h9HDBQClV9`XT8RFZllOEnF
zg_FUEx)$bAuXO@3%P`Pnc)iVUqr$x+xF@nbyZ!<>NXd#Ud5-||MtU8~3qbB^Tk0g#
zafE*i2qxO$xC~<wIr!zttvs4(e9bdjI7+@`Hp-7t$XP=;>V0^7P<T7glUspm>{p5j
z+8$!|_rV6Iy+^L#2W-l$3%=c%YP;kgCp@4R6(t8zEwxc|QPuIMwC58XEM|dTH*i$Z
zsSpr3+9F2fTStz_B+y)#Cgd)DN(U&Hh-h-t7o>X%-TH9JC}uSLM!?o0T@esk4N=u7
z`-<jh(BO)%{e2$h13#?Js(Jn`^ISID9`PGh1+OP_@+96oiCFKRnCHG>v}i+v0#_+-
zHQv)f6K^m*SjFKYMOWE*F6wD752skj>mFEXP)$|K{N_k0D9{GbOLKt8x^@M-U7}-;
zYb}=XQsCg1nw}&zCrFVgeM_oVO+k?_K<Oj`J{iX-VV_7UsxSvwNK#2Q-@6DPxvW7}
zngQC4G5A_U$3AMNsbq8!Ad}f}h_yqV=qu9Olb<>V&nlF@ynO|cYp5{CG{EjNWH-tm
z4WbYiGU$(SV$iN#!GtWakk#q`k2PSX23M%&$Y(CdE>4x<XC4ZBkG!q2a8kK2the=$
zt!?i01H{+7cxfin(<RDYEgx9`<?<O<BJ4Q&Njcp#6X)q6==pTdiFVU7l!ptQr!EVg
z)S%QlVioBwjGK$u*6d~V72v(?pm@<>bUOcHXKzaQ5u3<>hZJ#0hhu^$kBhtpl91~d
z4(ml?Sti3Ne7x00AZASk<I+AJXoNeb!Clz02(rRXAN}>nweS3JRLc?4a);`-t5~fF
zg(%LEaWG7NwEa<Ko0Bseq*<DZY(s9qSX&x37fTPX!#=gP^Y>YvvL~qGO0prt(}7}Q
z!Q*b}3QJN`9HB(C^*nQcfaJqH!%q^21gS!OPeeR;<}#^u6fPAt%l*R3;VzShFb!v{
zi1ovu3c5WjxcAUH&a6cU3jc`JHhYzepw=8o+tDp|S|NyCdUh-gr6S$GZwk8xJi9HX
z>fHix_;PUEh2x$WJ<OT=SW?bgr|a0*+a1?@^UfJu{u<g{4o0pO5&a;_FS&;X69M@J
zQ!w54sU)8`_pxOk=iY!`DrF>8ZK{lE-3{vT5rdU_*I^#xmdPQ9A<d&OC48zijsF=o
zX0jWRGb*R1%UC=Gfu8^)G+=JKu8n^6;{0&gzo+eF9#p6}eBtZdha$kA&z+GUOVY`4
zkk&{#?R1Q|hl?p^4n%z4ypQH>0O$4@N7DN8K=RmNLyULsdUgqT0zyxQZB=!5(UY<W
zibPhqU_S$l^}?+k^886tCfgS-!9dI;8VjwM0M~pu{Q)M1Qq_EM9u7o|THS>N@R05M
z=%m(0Nx;4LuczK)1}X)~m9r^+z(2{I1w15`LJxTtko?M$gvdq(^VUdjUdAw+-6=}>
z4j`FuTIVY`)Syv2N-ri5-H0@v4MNT<Jh6QC`?rrFeq#WuM>HxeMvhOY_rPGUWIKoH
zH{|Cs9|Se5<IG*u&N5fiX4##s4P$n@lpmv7KGB(+arONEJiEV@5Cpj(Es8J5mvOYT
zfG{`ILL34Vm7ZD{`8I@v{n^s=1M>P^hR0K28q*m@G=@N<<Vp=6=U)IWHb)cKJjHXP
z*|ie0fuExqBzamto{MTRzA#O`Vb>$+!@%}u0kr@VqplsSvZ<iBdnTFB{xvv-OINFf
zG~z-6GWFWZuZm^MRK9(e;{-OBiKPgn5*VuwtK?x>rn(V%5$Mu$7N&300p`+X6PTF3
zm&N^Co=r?9i}{yoXAC+yd3yVGz}bhVjVaYV8I(k7(}@4fp+)ObJ5)q21F2CC@O0=A
z%3NVEEjJr#DcW#Vbu+jE<sarWnVMpD&}qggHBlud&#D1eL@L-=Zhr?|7>yOa@%?LE
zL00W44Zqwg5MFa0hQJumR#Q0PhB>HUbed|V1tCT=&`r?g){2@yQSHk)6e334-D4hj
zDbmive5e@s^ODA?&RQZ>U&+J^hn6dL92+08{73ChRdyOH+q+r|fP3wwbeBQ3vRn^U
zon9QK))KS%@aj<vjsfw&;TTje$d}4;C1onpNhe!A2HHdg<4;^!Cl64e=q%WB+zCQ9
z#%hKTr7NEi-3|m?#dM533I{gPS8<uoLxg1h@Yv`*BplLp?zYmvXaF*sIC|XfquKR0
z(t=hP^)Fm|XLDD>HEn#s33Gb^qU5f`s!NqJe}0dJijBS_r-s#*MDrHk4mTUJCv9$^
za(+cCycG1BIvFhz>lLkB^igPd$B}jIm0R+o|Augx=m1nnd_mu5m{@5OLV)<N>~A++
zyjWVIHhITLIlI8@eIsuq>nK>%cDaXhvLEXo8@NpPhrxg)t|RVoA**a)9qiOj+^6g=
zr@?j@u_pCi16>{Ab|c{BK_dc*6}ABw&|xgUTg8(Tnq}TxN&EIwdKe5d%Z)4y!YP_$
z^Das#s21ri*cbMh$;3;R9m@$Q%eXfWc780(U&uiAADA^|dpXY8`N|2~ycZlBh)-Mm
z9Cl6Ldb}F^NE^_EUEB%LQ-}W6PmtLgqLCK>0@g9T&M>Zenl`#`<ydTNDkP)%VJ%k|
zX*C~23uVf><aN&?di6AmuTYoiK*rZDACmVdU}piNbh|5lvI7b#NkCYp##P*a;d1{1
zn7>TmZaDre64B+9l4T%D67`)<b4Cy(GFdf3ZISQ%xl#k|h_b<Fz_%XEJ>c(Fz_C%G
z)GWq`@OV%+#{x#<qM{LzgrI?`$!j_dOU-#;Yy{S4+5Ydzx5?B3^3=uTgeDC{t=_?{
zt3K{JA=+~30^k7o*s2)YpPH{J8~nXK@$;LvD2=$T^CsRFDoXTw+}9KhMH~fdj6pI0
zGd2w=)ipsXteLu7OE>OuJTgIyrRETA%Vyp$b<YJC!K&T`Iis<u6Q)KS=n0n!vr$&n
z;Q1m3r1gmPCDQGakicE(LdBE`NybJ~!%JcIPm>^5uj%B4atDnWB0s)k7H}V|Mk|X}
zhO7#1USqSJE18L?V;Zpdtvo3<h{E!J)p1p!cAiVc%)oi^nd4`UOflQ{poq6Ex5hwe
zu=FZp?f_i4bYQQnO0mswZapB27#^Uz?9M`WS_H23`ZCzm><b;n7lCxcOP0VagPA|W
zN_^pUq*NpR*`AEJR%FMW-X=ZGE${@&c)329%L~{aY#a<0#}+R`<G>p$n{6dQkxBdC
zKA3!+xx!M&;|o$_?`Tic=P<%G3&$~uB=?v=c6_OHVxcm~B)B46_QQLQhv+r~``bkj
zOz>2h;XXJ6=64uIGp!r^=y|l;AbYn)0uOpHfvx_fUmZS(!>OGYdW8}<1qA|E*r}&2
zUr?QfWukImEElehh~F5L9)x8XLNtvYpYdWbYZ**PTdX9n?X~T-tjS(oa0q{l<lHBJ
z9+cIMSgCS7a$ItF-e|HJ0t6YUthy*RY8<bIsa`6!dEb%n$#KNq1NkM-(R1AK&u)9J
zr%v*&<h$PsXF5yo+WTg8Z8Mg9C|UDZcAFZB043(Dd)ru}l&feY8bkuT8HUDyRL)=9
zQZ1WHq^>TeVSZ*JqvzfY8_8~Orh<OuaPFA6Hw#ry_F6&k6OAeV7knRa3%Q;_U89b~
z^rs!Oz_lqSGI>`|hqEQvtdS7(j2LyyuNA}(ES(l1eI15T(ecrw9E(%0JKj32Mt#i5
zP)M<Pt$Z%!?6s#)hM_EvQa^)FAK4~uwy_xCZhU@{94g4@F$j7v8TC<TJAvi3?^wq(
z$$2lGFJraGt(8vLg3G=aqPKqlZj1@Iy4HdM#4y(hKG+hNO1o|>9PijGpbZ^@Q4P^*
z=Tg1JXv^mK=Hn`tt&#J2x4q>YXV?*fmVZ)qM`iYVhfc{eZ{a}VK2ulE!E;Z-X<(3U
zNNiOzTokgn!nj(@UE0d_9r!b=dkWF)4IsyGcu6DD28rp;L3@xyb%gP9ko7>7*EV#l
z5QiO2R-%Afz0hs2@nt|D{gLKP>gQk!r$4UP0f)2mlSb%I3?y}4|A-rgd?D*(D>?KP
zET9RT9rIMXQeDfKPGt|{TSNGulcP;}%^C*pt8!qX0a*t%!eb8qNDG3^LlDg~fP9GH
z;;*byjzZUs4Cgt3wUZN4@%VzvK`5=hOnFKfNeE_Fu!gi{)Jj+&ezquD&?d2nnkMeV
zU8m1V_Ui?Ht#FR77L9TuLO36h8{e-m37Pxt2Au^Y(p6Sz3m+2auYX@?R2yc`3-->q
zZheoW?`n^YJwVkH>Fu*Z05LD?dee=a?S=4l>Qdnp6w+V9wCPb5>|byyhIpzL*^>Q>
z=b@52sL%m|Q}6kQ1tNT@1MIS%NS+8XYwoQ(wIvI(uoS6D8@a5M5F_Q=n5^sjHNco*
zoV=DQp7-`)l~)hdj}jzNfgEEC(X0tV=c9^%&R73p*Pap&g2x6_Sw*J_LiEeoFeR!D
zG$2;M$%4|knbY^REO48rpn*X5-W<3tQCo<<z|p|b%q}-FXx<mwn!XT}m@k5>%z?)$
z6Z<^+u=L1SfUfQRFt9Q9eW!1)l+&5D!5Eo_cqM5;vrCrzQVjif*s*q0=@{<JnUd$+
z41|E4=Wo&s0pp=vrNI3j(dI>nM5g4=1D&~=GZ4KDq23h=(ee{s$L-&DUz+lnitx)j
zG>5~Y2R=zh74U$eN7JnSH`vN4sJZc$GEN*<6ld8jKO(Aa#O^%URhUl$C0)=Dcg=^R
zHU!5y=kbBF+8m3Rvmw;$8*)F{LtN=^`RR#UgPAl9Key_Z_sucPFLuZuDMhggtCXSN
zfR!^u6xT=0=5|~%dSi~u(<HvP;i2yBP2tK0GW3Gq1KUTB$l6z|T8SOGxC`4r$n)cn
z5sqlLLcL~0Z_x=7(Aa=iiyH;$uS4d}#78W72NUhu9?I-XwI%$Ubwv3)2wCkBmY`HU
z2_@Cj=8=CT?sXnI)Yxy4yt9V?)bw4C4t$x`XIs(Mi(w5(klV@+L~b8A4YR+XC>+4W
zUoT|kDu%lrP|WS7?;!_)WKNWiSS3@^2s}fr!iQri`<~Cg0#S#=;{)Toe3*+QpXi?T
zsA!4XBi_N3t8lH!RN{G&cB9Q%D^>nWUz?%3mfzz^EgL0By39)>lYAkMLEAK<oRx$5
zu&r^q7b=Zh1~cRvq&ost%15eyc|%-fNVOiu%jXO7%kBE~`C}xmmQJ5+q<V{3<58B9
z3UgG!cj);1B%aGl=6OroYaI0)tPDziv%nQwv;yzOj%~&JuUeYb#QzmTAoOnMa2G8j
z1`Rgsy5c^tEoDLF?nO)a23Cr8Ekzs~nVD)a+(xmZ_cB+2QzpEMu1%_kH^|0MBNixS
zt=BcNXT0gta^jlSL(9)5#><QQY?QOV7{xPz<^%Q<`f)&>ozkV)1U(xQU~H0nSx|M0
zB3sO!^y&R;L<g_)aF4jG;}@{c`j3J<ljCg|6g1zrLV*-f)nx7!3J%@FGE_V>K41u<
z7~Uv0X-U>QYSEq^!*E^fd<ofLXO6Y01QIOjA3aG8MOPz>&g`U$32?-Gewh_wn2CY<
zGNkY#KK;oT*HQ)w36OlqM(J`gPe5^7Fsx0eGO^^rmoNDsHzHShhiV=7Bf*w<uc2a1
zq5Z3o=ub!66hr2}l$HS^-RadJFk<U!DAbz?AB~l!nTL8*yz(ey69)da9LvFmhY8Ok
z<&TulWGIC%XCqrU47ANQA~FZ(A?FJ`Ct+p;b<u=C%ZBn!Aq9*uI*Fg;8aTcZD_;1~
zm&XHtJ<iqux?D`v`8EB#$roPUX(uX04MBUzJTs`u)Cnhi&WvDK=2IhTp(>8S$ps0-
zTwuaQ5{}*!9PNhLoy#Ed7iN`Ox}Km!c6Zaxix-AfOFE!w(fa5t3marX&m>>JrXl%x
z=;H+Li$-l5x_EhePyzhD$AhC9#4f3!;<+0^Kkx_UO73F6b=FQ(_fyK|NlA!dEP{PY
zpkAor|9JtDCF1~@)z+~h6KWH^as6r@@i>Nd?o+ku*KqC4&l(RH3yEoBM1yIwKKdLL
z*7}^+11p!(|6W1ddD2o@2)C=%=t2-RQV$Tgdd}o3jusJ!i(Q(8QROyhUiINhP`%n(
z@QAorCX}lYVRwk+dukI0m@L9vVj?8a^~bxvxg#wb&TdRwFc9KDu?Q~Ipk=ZYmyosS
zX@5R%KLzc-f+MhJFyop2(AiMSl%-0L^FS#DCmkvQsa3`(uDFVG`e2?l2?MN`o8$fU
zKtE!De)|(=o(;8ed8jt8W)YnNtbQ$(l!`MDY&75UDm3yp2$W>&gh`<>h5(^ONJTW0
zLTPtF=Q^!3^6|K?<horj9(6nH#^CdNdZrXi#I91yN>SmP)31VRQci_hy=+i7T)f?U
z(|FBg7NkG_6GRWr%UrJqDlhR9oXW*i8{a~1M8_Uuwha=QOO3a=<4%#sFQYu&cR~-!
zfi|&dhJ+~LR5R7H_8e=CJVT8v6sLLQ3lm^Wg6oySD&txWgV=<#wNFD&JfMNK-aUP<
z6NaGoOUhrCLXFa#TO6_4Q16wqQb-7d>bX#bHrwg5dtK~&d;RtCBi_;v)r0IDo7>#{
zyA=HHV&yg}QvS`^E)|sVL6pZJRrzcib>pz1nL{<p_2l+pTgK%F#(sc<#3HZl$|;=a
zw^|Fhb9a~RJygIknBFP+SrHNn^EaSYX@qif)+FV{cR)~sLX7}DUGUIVHBp-Dsp<xs
z6Euv{p_<kKLuK^2MSgVQz#hgC5|dsHnvrIZ-N~?wK(fUr*o}(h&|i67XK%)%3o$vP
zaZ*VGtZ7Dldgdj12$EiT6W=Vz6l^p4W$x!b^`o^->DTKbXTEw{1x!p(v0p9RSp@f5
zxK;ORGmjLUC{F!rG$ntLbiP$8F;m#HWBtMTdzhn&&an7aWmllo!T4_4b%fu@YSh~#
zFhXMU&6=>Ioq%+?ICCPn;N-_$YCe(^<^`FVxj+DQteKf>{>WZI(?=YS;uUESw~0!L
zMuebEiwl?^n<ufs>g4ztpc=p4P2BMYWisnG{i*o)V|`iUrIPT;Zs*CNouVJ`;j9%?
z5XBy@7%C`$^u54y8ez$ZiDWUq8GvQ*_l4V-KOK2@>9+1bVMhqiS%7kE1=JXLLijIu
z;`Scza(08jys%gMxb3HNjm(?=5^Y{+03j>Yf+vi15pDLlUK(B$D?tz5$YbVSFlVmg
zMB!hE{}7dDVzzCi^;6oac%|rREQporF&(;i%c|hpn>baK1J6+uO0a#xc^*Qw3^a)N
zRM)m5I^D877W!0#3l(}J@uJY-K-v^mZAIgh8KQlm8KKoXy49~Ky@%2-fw}bHVFQT$
z4`lc*!rrQ+k>x?yN08EkilZ^q&mDbxZ5W=fk0HPm{hI}Ei_<?6kmY*KG-chz)dS5e
z?g2?ab&G+UOO+c*I&?eT937hr|1heBfg7KocuT7bL88$tbhBiuZVogAs_%ppFb3g`
zRKGOEsWh^`sZu$i&!dwqsF%`^MOIh~N=#=!2Us}yrrY?dPz0D@GR6KOf2&a1Oj1)q
zn=t4yeNzoFy-fnsPa9-ryAz1eKk1?^{|;4&Hi#YQEjs63i4i2ALCaSRMa;JgkZ#Q#
zykDEJDQg;<Q4>Cf&HKKcEQiS)&V$%P8wIVGfFz43&ey-<CrRI!S|O#O;UNk20Bc{%
ztGT+q#<%HJ1<0%+Cm;2!AOBMslTQ~PFeo<PrluI=nbPn)-vp|t*+6sj$_JX|t(<gn
zv1OTEPz^~_;6&;HOaeyt{I4G4FimWD$jw6~Anqam9cbAs5l>q?xfR^O3<MKW*Sv6h
zK=kZlN{>2*KN-X*>hJGw*#)<ou)_=D%YFM_7kkrX62#{7KXu0;y<Pq5e_=YI0lKL7
zry&VGC4$|&M9M0jI&nKr8PmzCzNv)5fIxiJGU{-G9UJyH@Cu%FU7o|RIVujnhfHb`
zeox5EBwZU_jxE(Oi*&hiiiy_=iBP4lRYf&*YZ;y_zX7pEa7&k<bQ{D_9vhotIt{=o
zYq;e&5ow)s4lF#%kNloRqUutLr8v2i%Oc4csp%o-%+kx0{a9E2=&<0({nOBbc*voc
z1~tTNNDe}}gi?BujT7nj^jtrGBQq%x^{$Ts`H(C<a_(_mPDoHduG1`R@-Il;je(si
ztk^N?8jpsU-b24x^MoW5$o?iLJw7*^(CosC4M@r7>3@SXLa6ek@#k9uty+Q&`b}J4
zgbuZz?u|JiF=t^!MXxgZRL?=H6L=wITkY*F&9v$S+tvrcwPIoQ_E*=g5Pol`(%A2q
zH%YJ+I(eSor75!#@gpaJw5z_p*yW=T-}S~?hyowy;o-x%`bZTmQ_1nQ)&dmygGnid
z8pgzGCacWHQ`1VjGY#zD_G^bF8^%Ag8D(23D>b7zI7ZT;s-^=p6EAJPfBnk=YPTV-
zNBo@_d?;Kp=b?mj;o}?JVGMqtOw-jLWav!c25Xd-aEHC>|3Za(gt}p$E<K(pa5Y5|
z+=PIR=)6}PsuQghbqEcBn${dd*=yaeDh|kZz<BO!A2AZUGo%uTc2i*99hXtqKq`Aq
z@m1Ae{wuHjxCrI~iXjmT*DJ>MD^L~^!QJ@|(THY(dXj3)odAjy+FNa@%ok6xZJkaS
z|ER}Cyq(=D7IylRb;$9QvaN${NSj@bO-AhVSmWvH!K*Ru!k9>!odU0pdU#7S=_dQ$
z1iG3=J7tDRM=C;d=A6B4%no|YGm;N6Avupz0I$Smj}(Bi<T__ldS=1JlU$CUb|Ks}
z6H5BH2r{(7VRHk~s*VX)BSB=3elolPwP0~1yrK52+~AAsZvke7lQ*k2-j6>UGZ|i_
zGcPv!d(jS?M5V~LY{=4i@veVCOS^;mgIA|I?uMFa2vo4^sJWG|JY26-mnAGtnt;{_
zXJ*iV*uCW5{+ZjJb(K4J%15~`byFCcqM2D+dx)NN-08|5P9pEF+!Vo<fj(?6hTR8X
zZuxr%*D&1X(}=PV!yoVC`jiwE;GY~0?QnV^Bl<=`Ik5m~W=M9UnE1}KtqFECFS{ka
zOCV-RvG*l>*D!Bfpu0{mD+C5yaa6UDwPzHz4AgTPh?#`nxh{h^FaRv9jTQ!}Pe35q
zM2{Nfi#HRNN}UwFi0RSyOq74)rA!4xYr*AEz_C@n)JZyFQ0s!DTy0I~C0n&RLIdaZ
zai|ox#<%eeb6FKbeLRqqqO{|sK)q}8p{r2NMQpc)_>;{d+&s$|eVFvn5ck*_uOY}%
zMyL1)C;7Bt%c1#us$X%gTTu=zFWO71!tCSOCb+--TiIS~>DcLz%Xt;~A?JZ}nNVMw
zsp}!{BzsYHw;+YzmyL>|Gl!zF`KWLOEwaLG@#8P08{==fUa|l3PLwmDDS+$xSu3D<
z{$m!<LEl_62jr>e*?m3kC`f#JwL|V&Nq<?9YujMWMhy4BE4FHU?hP{B$=H0`@t0c1
zx#?u5x)&E(GsEp{Z2&asd~Jw5P+?Z7s)>MD@{d+1kv)^_uBsLJGKUl<5V^xO*$2Bs
z8wB$r<nPdy$KW}4tK?n%1P*#ke!IN@V8D)#BnPFY+kJ?|a__#>u_Zpmb8bA@m&59~
zaV71@^S9mlZ0b~=O_S15$LwyHG0|KV2QSE1E>q^PkgYUE)uyat_jd3|3uUOhT`!xC
zf1Kf|@O?2#;`Zsp+Q$_2k)t^6WGpu(V&GO!TG^g4pA(o5aooF|sc!gZNF3!ne7sG?
zBUC%ZNf(kO{hn}%PABd}pJAvfbH(>dR?(@(i}klDi#J`y=#qHD5rGf&yQX_iGqT|z
z%h<*L*>(NQuKvSgkk&hwy@cOyK^53|Nw@SKP@Cezo1DW9eMAd-)P}i+5BSHq5FBZ2
zQXF|&3m(|V`sPHuJnrA-mfmnEzBb%*Z<c|bKy1%N7ZKp60`%u)XTbRZMHT&3qUdUG
z6@QN`;dhU-k*<zt62V2q&23kB`h+Zr;UN__=Oe2qHL8;;C6D$bwVU34`C#U0vq-Y5
z$=y#vkBva8>w(@Dy^qsSFZS$oY-B903Ar+NwpAKIeFO9ID(r3yfI>L1&9vSVBt<mz
zi}jq?u-iyGd72pDqk$*}7a={Ay~)hJRle2wWdp4?p{E|<np$PNn-2s&l=y>@MX_^3
zK)!6kv-lApbTHlP`aPnG724UTijGIIxLWA3Tnf4yQG1U-U{g4-;Vy+e6uKF#HyUHS
z%>;$3EuIe`vQ^qYRip=~sFd;V!HtfHQ>jFS4e;WXq0h>MyOWKGNL<^vG|wGpHpVk&
zp4u$>is`9#sqM^PVW5^MXvJu+K^EdUFm9?i9J|}0trQM#sXCQWqid5umYD~J{#+E5
z{ximQ*(fH`**QwK(DPLB4ToZOG()+rQ!qG~g0t||nT2{A!aRP46s<Ob%7D3-Q-=KS
zdd6nrBt@!vD?2q9mE_s7skBv@gDmOp#iE0U^CCF0+Ur`_iSNEi^-4ucY?AgnXWvC*
z;-0)e<UQPGEC+2;M<2<A2KewapBh##!Fx*4ZDRWalTTl%;^8E=rzsA<@10Oy^yJxy
zQDAq8lN~-BV+BPEk?<=2oDo$Mz@C&5CEQ2_b!qs&>gjY7ci72U1l<{UxJO7qXi01F
zXUzfqZHW#8{l$N)VsZLRz(>Cr3vQ)>D2hjs4Gh}JWX$3&1jXPvd#5K4C2gt<^nq%r
zNsHV7GI^^eL7Gf_$Pg0`w?Ne3_A^hK-kQ^t_bBhA8N%Za(`f$%3JtCxdkYME9Bl~Z
z>i}3vVUXi}uwGwLzj$HBqO=+SxqKsWGK?;1=TF}CXZ(7iHAG^j|G?d*gDa;Yo#L<{
zZP~gX9ii0{)30Mq==p`%zd|a^u-l)jxFhd`n46aN7;3N##7Xi_51Ps6rM+jdN{R`B
zV!Z54o>xf$dwLn)v3@ai&I@vZM&)J`rN&05xkMQZLhX1n(Gp#@zIBEwIN`jAcq#ky
zf_^)Xbx!6gY3>r@5u^qgKAuN11!`33s#iWkWc|_VocXeMQ$}MB#^=*lN*f+_m~uqw
z@@uzo8K40uQRpw~4oQ+K_9-z9P|7O}O6z?Qm_d!bOHKlEamGzA9Gvo0-Z`0`&IANx
zW2dagBb&YLj3Om(xrknifC3PyiLq!p()X%y2l$;2jnFoC!_mf`&v6Slc-S6d%nqnx
z?+Om89PBQ3>(8^LH>WK_jNzf&E2t5VS5U^33C&F0)c&6n!dPZz1l{Tc=dxE*`9?Zq
zK-o~)I?EagI<A&Dp_Y5P29;E}{gY57W_YK1z&_VUTAe?NGg%j6G^C;7t@a0wlW;c6
zJPHzcRqL7*z|m<%V9)hE;0*i<1r&uCL9I;atnM2@Fgd_4w(*CLze3%zgEE*|xS<#1
zp0Z_OPY|h6WjTu(YwwblGR7tk=+vx?+ASY##ZHQ*U+NZ1%fGbf=j3}(VH4!T8uZ=f
z?eqI1y=BQyQ5AWqyx4qnNKw!caCqK?`yoBP3eN1_=T>V068GPIf`(Bhkb4Q(x`-zo
z>Brx^s4<1yMRqQ502f|sbwfd3YqXH><;Q2}rDAwtoi9h-0Lig;rV;}|fK=`pJ?;!U
zd(GEaDnO_5oUc@?$K3L{_zIga#&Z2^2}94not4$jK|CwoJq)#0pu^9Fz%D~hZ$YUJ
zCNm#KcA|zt3*fr>!m<Zil0qg(%~dnN=&7M+?~j5(NZ|_Z2<pGszlb1Kk2#>ISX6F5
zk$v)E;d|&wT~yN<ZZ#A&a2jCg8%Eunsx5zB?RxG-sR4<Ss`M7^a)B@Gt}&|SFv&s_
zA{QcDJs#M02g4zETAWun=0kaQE_IzNGT`@MKhC&ou0dgA#WyOR{rwUwamUQ*^gtOu
z4JKhmMuI37L3QW_?F~UR1bp1JfPwoO%y_iHb6%!ORN;RQDi|i<ope2P8B=DU6v)K4
z@h!-c9^=<o^aUuRW}@T`2;Ce$+-G~!@fk|65B!VNw_oe8+xIQrRPZQf$IMQPbw@x8
zuMk9n2sd;ax!yAv+=vhvnSgPK;K?YFkKbz%H$1{87?O&8p$&G)P2uYb)4LFw3!@oY
z7Z0XJZk;Clxu#IIWJuSLw_q(jXhh!Cp;l7gFv8qOmYzlpPyj23NS;+u4AY0S5qH%U
z6eE>PwZ*+A;goOEW5`nUP)0Qg8YbblAJ)g8=?(g1i#c6!RQ}nhogC<8ID3Z_TXdPl
zh5#g}VB01=&m&Q!NNi5eq}X~|04ZoNhaA@%a)GOuic(Wl?{6&kVQfxo6JD6<`L_4c
z=_9z$M=rkF*m!6T!LN}f;U%r{c>tmF2H#l+{1P1Gj5q&*KnOl5c7CIx%HOu|oXU9+
zMa+gi#ar9l(5r_Kbk!_V6!Q*Ynh4uoqFMziNfw?hCVfjmQ17d*ASa<>+JR<RTxU3L
z=(Vk<I>rbYyIE5*O8*iRH1o{)kh-LD!b!joS>D-6nzGH3<i7zyu~nrX2ORK0O?jw3
zc{B7Ah7lU*C@3WqY}+<3YsWp%Hqqx@h?vO{^4h7wC=cl6of~-<xdGxwQ-Q!qcLpx*
zFkggLX8R-AC{f|Jv_YC6QRWqWUTAqk{dW+%4G#DpjznYDct@(b6~n&9g#SdS;^o%~
zN_V294~i;w@!vp@%o2=n4&1~y5PtA79X63o>=BMVNpb&!1j!ajRN?gL%OhFY=x#N~
zsl>h-B%IGUQmTGm1Mjud{8b^KA#x(tBMQNZq4q}4sG35EDrg4vSm{CjTe5=oMyu%w
zk04;h>d;B3-!UQj&1Bbt77z+D#>_yP2Ee1>$0s5t5|>cz=T5kD9{|hp8!Z#s%R)*;
zgJ1X3!;-4sCJuaZg33ow=}_*3f&&^VyCNNaIuhcued@wcH}Uo_3cl|=LGr);4dF;>
zx#@G<o#aP0B+!|^gyN8%slBf~0^**yxxT%`;}0u93#9=7Csit#h=GKCOnn~Ez+L%+
z$5BI4gHYMr?*p6XJ4pC^u_tsV<#OoCt^%cR0rK``P8<(fOgZDfI8ZQ(yVgOGnTt;`
zR<i-+GT@&KxV1hEB6NwS<NCe+J1t&v9{kdgm5}uOgV%lkg7W>|*AGyzKD`iTEC>Y@
zx+I%-9|w>br})g9*T3JH6c_9Dd4w$xqJ%`?I_|Q3ComDD`skTH<NNxmSKq>WKI-ol
z*|&4~sKimK@?Yo85`Od#-JAzxbiYoVG;Dhgg)}SZ***?J_%;V<QJqazxOyhE%qV^j
zFS)9;26YN>^g&(A=%Sh^TmVZ#kcl*Ru~U`lot<6=k&A%&;A-F&muTZV7rxi6W4JI=
z71H;3fI1Q~aqTPo_q6N07LDa26o0Y^kR;u`{*81*RTmG_^1@IUD&x(83d<zy;_b`V
z;7)fy3eK3vJA4}rCnfHOp@9I+frKzkas{d83m`Em&6)7_^A7t5Z0$V&RYAa9#`MI~
zL7a&&lP?Sidh;hMV-!OBz;4P7-KkXbe6r>aEeAxu4a#_E)7Ked=`u6GI+P`UMlcHK
zCsV(_Joo)uL{$_$FI;;x_7^C5(p&mt-mx#|Y@I&ESafAQx(p*F_tgwb+{y3v`Zqs-
z)aY4Lhy!4jH2M3lLN`jj@GSrdIy<r~0-Qvv<H$O^ko{IfOf<brPyYB}%m1C1t6K5=
zL-9%WxMoD<jHV6cvRzQ!h}&EH3c^7munwBG0<Z=rXVBQ_^U|vjBiq;HcTg5je`aDj
zcKyo5=05?VAP$7<_j?0gKY$Xjzjg*1vB-ydpA~TNo2p6%mtIB9JWv3sodm#vq_My<
z`4E-|4ct&Wdx!}hCJ+8Qi^Q+M)Yt2tYF?y7<7{3m7U}v?*)UH)tRDr7(}8L&rdkm}
zP~}grBee|zey&Ti^BvSRg@N=!vs0BTjTCIxjtZ3XaGj(sg#^tt{ONcZ)-n^0Tjd_G
zxCr^PBLUUAE7ENl`s?3<-i;TOx36Zyz0=_h;1ZF^K8s(EA#R7U&U{$oiJ}d0$A2s`
z$~Ewtel<XF#mkGZblOOP;E8nM%2lE11RadKG>yDFkZF=hXEj42sfgizVxSRvuS02f
zCcybvzRaw8h2XRhqZPnfS`+f#K`W9sL@4@kClc}nKc9y1$TU^H&%Q%EVF?FC>XHS*
z)I#Hv<(aj*zys%FS(o2;OhVb`PAh<K&m(W82l}P@sqcVf+Y0K0`l+N%hi~28XgUoz
z%WdBLngpLR=rGo--@MBdKKw((nlB8L;0S-7w1hZhS+NZMv>O+zG%FYt>CvgfkS{|+
z)+PmO(T7AA4^(gN0DfxaGqV7{Lw*08;aykX{slnDsQM<_=Y(;aJyY@c9O%$@00d(Y
zgDC_|Rpkrn%?%&<3wM^ep$b9DFo@4S&;z8fkIsD95bWmE9v3HcDHa#z7xe>Q6r;h>
ztI3SUvJd<0-uJ+H%K|XNdm+aRX%RR)TCwtBp`Gu5^`kNB-h2Q>A1$^OjYq?xSOFY(
zib80DcoE~q8OX~$20<{NrA>c2B**zmaQhqyyJ73Hp(7sb`4zmEk_xs9{MrZ1=#o~=
zbSk3i0uq&DTiMToy|*3z4aW{-CoI1=gdT}{z%&OY!oPtZ^JqFMfConY2t7QlHw5=<
z02E2?p|v=a-1I~&p49LeaKWYAUFbjSRX7jjh-cDUgC#Q?H@YR$q0XEoWQNz|E>oRW
zeIF)bYC`#OUAo?tQp{w@A6wd>ox<oR82J}ml=Cy8>~(g~_077uMWT&%E{-|>&MlAo
z&|O;hnM#4vz(1j$a^8<~5ll-4%*7x8>eBEul69fI<p_aj2Q;ofvh^;9^&Y|70wYY!
zPo7z_gTX;$)xT0E!GEtg242)~eTCB~x;;go0a#`dAnsbc0U3aF=6N39pP4;&)`u(j
z$o^8O-!ZkYpCeu%c<wMSgBuWf{XPsZ`d%6gPLte)3hlbS&I6DLFeGfUjxrg2=(+8q
zU>1zJFO7yn>$0%cQ&O;L97Wfs?rbvql-QG<THn{AdsPlVp1q;oyN_K@6-n0vmJ~TY
z|CIWJIS|$6!%>$jbuN7iugdP>Zd{6mgSHd9-++}Wp7ja-4Rxcc?LPXO_21553T2C7
zSUZ;9Sx9Ld59peGeEv|bS-7`IwJ5v{wBwitl|mLAqzbs)dGFttJLVTo5j8h`afESN
z;lT93wg{U&@8fM@q$lrUyN0U^jo)*`y9uq4z4@$En$&v&ly<V?dENA2(SXk66CbZw
ziqg6X&Zl3|+&;1=ngS~vABsxQl;W(EyRaP6H$5fG`qg=_*%|zvb#TZYK1mP-dU|Ue
zoI|f1zz$FoU&#zLrlY2%tLMN3-KVz)?RsIxxJ1p*@Q4W_GY0S=ki|x5j!F&+oeYvm
z)ri6cS3KC0AqM_8wxJ_H--$!88^#ijWHBoa_X$c$ZL-?9LVkILwp{sZsG+aa--m1L
zxv|Y_i+;>EDFA!>+zKW+Sh9xDoPYKpbc2?r5bR<WP!BpPGS6<{BEmC98T$v;vl~nK
zkAbt*0kdFeoC8-`BpS-k&Mz|uUjY>8OK_%EOrvWd%oQk`Wk`A<UIyQSOQANBjSKe<
zd!F}|rQ!}e=4k7Yg;PJY;;numI7Hcz5(W1Uw8!c}SUK!hwzWZCtoKAHy8fOn<32RD
zPgAv7d|VM7KgQ3E&o(|C4zLF`l6Dw2?XM*)Br~&Nv{=;Y;9l>sXcEn`hj_%Cy$IYb
z!KE>^<iZC8OoKt%@_oi*G7X@uif{+YX5VEGSJMWMK&j9g?4IG!{GY*ET`$0k9k!;-
zSl)S6G}%L%xs+n3eQRxk*LrBf_0?Ly(og>CZZ}3Vr-B}>mL|Wprjqm&fCAg0v)X8C
z?@iBxSRD>w{Dic(8Ja+4z-BWeVDpoLKOF-D8{|0yiMBuMpf1BJN{LP|OtMk8b3f~`
zhe_^70Yk(=xfV1=j(cjhcL)afa8GmJZh~4r0!&A_+yLmK=-vR{bBqH@vz(ksp8FwC
zztC4}q$*L`JEIHpT<N7}WLxvdCQv>QjLIa{=h+%h|8Tq>FDMfK(E`2bU;qYM2f+Bz
zCz(RtDR)8~!gp%9gq~2umkVhvQ(jTsN!(}*z5vzl6$Pxr8YFSHBH#s`$?bsf;cLU2
z=}VB4rvjR<140dDK}~57SCCRt3>~5-6!txHMo1&hKvuUH&(zTXRJoY;_NwWI1W4Gd
zAv$s(*uoReVeJ9{VLJfDtxfMi(S|B4`M{6u{kaiXpAgf*Fc@s^;A~9_7ka-|Xs&5<
za||xu1tQ}9qU$B_ujAa0-M75E%hjQ!%>9><`lbi&k{$@x>b>46?ge9Yd>zpUc1N!N
z0H@IQPDmQfV0OoA7??8XC!S0BM+A7bBA0)2J&gb+&`c{LfMgF9#106g25{Q3WCdt0
z&H*~8b=9=)(FLb87Ba5*h<;9m8`cd{+nWbFvIdXKM0L~NL-UhRnk&}0i(v57y4t|L
zv0o$)kdT%z)?}6Gnw<ODwu7PL+_0jP++fUE^Y}f%4xr!7JM=h>@eG7)BHY2ZCoKkB
zlB}Rnl?{^*RodB>T6Q7NDaY9j0t~U!=9wkRp1fIBEc$@N6%zk|!kesAr7hIW7of}`
zxZ40VE0NYAVcT}<n5ou*BNCuHgF=NIh@)zc3eVfW?t%i68Z!99Fq|<iLEX{%I{N}J
zdJ1kkPY%dkZiw$ION@SjAM;YIRP$HsR>*)l(J6jyg80A!<}#)vH#3P`4lRWAoWQ&r
zgGUoF!eCGlz{5;62f~Kr^D5V%<nIiD=uqi~y-TsDpjW7;b&Y7>E}iop7^=%5#mc4E
zK?zKt`NRn%l^Dzr+6p+xqlh^gOhqbWgpjNP#<bTq_YOC`?v)cz0UH|vY+44CJh}uw
zyhm}~;&iRs5Tt+^P;q>|&nn_R!oa{M9dg{X=U5wPJ3KCMQb>6Tm-BP45Qu9a=xZWi
zb8+d~CPBusQKc2?9xiNQc27_j4!n_At<DKIoN%B-BTQIZhi%H@YmykIF3dchX^l#b
z)&^$U<XVj)V+$HRfuuvw!Q$J}j&LRjes+C=Nz9IzV<5N)sN3taWu5#Z;Q;r)Zeku#
ziFh3>Aqpi-K{Q*hx&W>DKK}@%N$|kJXu8k}0MFG`adYi}x_IB@18C_gE`!<2ZK)Rs
z*`^0Yig_3(zCgDn6<n>jLqw%Y*#I;kM{s`<Kb|QK6f3gZWZ{NFGlH!GWGHRZeb#s}
zebn@Xa5-3VNX5MaS|B~Rt+D4z57!)dX=nK!hF=KtvDp^-WdW0NuCBaepf4F12B^BO
zzB6U_d`@MZ=^eP<lI92jXW(Rvto=|0nu!y}^yK=TU|cpr|BtfofTyzk|K~)G%tMZu
zz2ex(NI5nklypQmB#}LeGP4~kWzUQf(lJAlO?Fm7*&`A{HvjAHd7hr0@%{e3|5skG
z4rg8WeP8!=eLkP}_~b#!p=AF_U_JB?(IS-e|C%2r!TCkivTCfAC5Q)35<Calc~_S4
zHe(kckd*^DezNLmBS0cxCNA5yo~+KNJZ8T5q2%dF=@$U9{-Lg8m5~K(E&2`{0K`iH
zA|qn?II0v<sukJQi$7`sEcGQ~>65+}+<I=BG0oV6vrRdYjZIX&*>}?(#DaLJooV?y
zRc5*q*DeGN)92;44QlQx?W$&VTANB9rxt%KE@_^8fALgRhHCcWP*E1Ripw<fPi{Z+
zJbT~*b|~@Jg7dmF-Jr20N6wR%>RH3jJUw0jV(^pl0(||k*_Sy9YLAmnI-GdQWUbA0
zbirLu^hu=1iH2cQ&<QB=nQ<kuO|VNgqBggML=dg5U@KdAT=^w1AV`XQ4(L>U0Gvyn
z5AlX<^ilKD`l8R;E>+5&renz0BeKl15V>Ha4&{vfr3A6<|MbU&?ovm3_Ww&O_tO&>
zT0*aD`VRiGp#D}5{i}c1A3NxEQ}!|>%|ASGp{J>@_^$ug#raoHT<}6PlKAEioAW<D
z`^OOe2?l{I>nXqyn}(dY|8<A^tIZ~K{C^hp(BO?4em`9xqW@P(GY^!Z!<dr)KOF0R
zpY(nU5GQZn3r+DyXaw#DVf5~wzy1E`*ZbkepB5nV#;OrZeFpFuy1+5#*PQY%NA>GU
z{KJnd2};a6_u(1)zg&P*RD@T5YZUzJ3I1EO(+GLCBk4oIPWwxT>t80rUv}4@e}_J0
z02rYZCha`>1snihbc&Dgw}Q-Xv*`D){`FhYCm;_fwKKT9AEm7S8lnGkq|g~C^ktKa
z|A_1@R)s-(#Xk)!f3vgfZx-vB4|pyv@#sG?ccF2O1Mj^QF8$YheSt;5)PUhPqtYK{
zrEcVS`48!TI9+adk%LUK8E_!SKThi}&!zUWfzkZ9JnA~WDZfVS(16vUEZEgpvt6?3
zxvPRUq3cPc!r(6#20Dv8@b%@fv2_9LrPI%@5H9@n96~SndB)tJb@kepC*K5~>5;d!
z`~5Ug^1Dg++g<xeBqKR>T|l1C`Ii~PeuB{XueVDvWHpG~6tELe^R@f?nE$do|2gyi
znADlzKBD8{1Tn(TDNrVi-v9H3zd!oNq>h0wRO@%LZ7+a5Ni&&$lH5`0)7!Sre>Wc8
zV#v+vQ_X`)dD(*K|KPumj)3m1sWbeY3vQo#o1g&f-=+Y!Bl*fmk_*{;3$zqI+oMqJ
z)1RaM@#r&Yw8YONL6-v~y!OF^Y)SAO`XX%l|HC0118Ejv(glC_Ug9HM_)(Pn&-Z{c
zIH5dN8W(;W8Mh$X{%>RaPY&UiX>&0v#sgQ?>;CZuT`|IAe;dhPsu0DHC%M6Q?xo+S
ziQg%r*8gY}=yzzfEH|Fb-+TI?d_uXqyCFrV!;p}cK|xQ9o&RXKuMCcmj*CAtM8?dZ
z<33@BVO<JcV*y{Tne>q$iC~9Ap|bw|^^Wz9t}k6(=l158+OBP+=M|<sdpbr#h@k%Y
z<A*$to~%Uf)a8HRWQPgG`VI@hz=Oa4=<X8rEB^AfKi@yhmw}A)_ic2U$;cH~Mic$*
zM?ekDgY$)l{Ntzp9ATO6pRZ2x1s|bJa4r7$kB`3jfvErPfBe3%e|#GFe0_Ppps=~T
zH-UMN|K;mK&`PBIZ~pa(C0boFmEQ)Q(S(S0FKqIfw!!~=X(Gcv2M_-8xAFfx>~FNu
z?!xlvvxxkw|2oY7^giW`@1(hbzn%98=$fcDJ;#Oqa-h;M+5hE4{x*=jL~uF>_CDv@
zcZT{d2>j!Ef-C&rFDV#%DLag&yDWpsNSp5G<pvM@v|!(U9oNsZ^DjrnPEQy*=lnpg
z0v>{1<H}(A#{u*s-}wHl1^kz@`uo}GQ77U~yT#p875?+`{QiI&p_}mczd<8I|8jMI
zT;sfl(3#n;%r$_{b&oM=?x(*`_8-%~Ob0FW+g$wfX8Ca%a=l<enStQUghUxe{PPv9
z+Gw%=;RF3_!O69h&<pu}(5D<=d9VMc1Nb@9n6xq$np`OCA0PYM2!H!E_bx0i<Nx}^
zjA6LC+!ed@kOi(k=7=sCQUB5Zk6Z7Q3pf(VH7dMQ-i!R7li}9~|1vRtzuo?v^5?)r
z@nmxTb%FreNtyDWE5y&^{`F4z^Gx>;K*JzND-J_j(Dao3`RZSmB%5^KtNec)=dX+0
z@Arb=0tfr5@rEZdFZ3@buAhmpOznRgNsle5y4-tPdD+XxM9LYfr1N2a4)K?7{BN_%
z<}rLv8!P~CWaeM|Wf;)?!An?n{(l+?544hT+qQB4INlz8BG+sG;iBq>A|XSo@<tlX
zdFmq@g#Y@_FX_t>h+xH2bKy#9hYi2+WTnm4@vH79K#Ix-8MxYl5@`v4*!gpi7z1EQ
z!62i(Bb1VxWQrr2e-F67@tpuX;)!~|A7%^u1&R*#cSitw2!l)L=SA?J@q<V`pzJ4<
z3<UR@toI~3Pd$Wq52pcrWfv3}+Dz2w_+e#Q<AB2$k-qHSh0E+sr}gNX_INngb#*qU
z{IVqB<qQA!ndyuMw}0n~Qhu306rjmokl%OR1r<)F{!*asXo6Z}odGSRne`*ZacD^z
zX}~1^;VKJxP$~@U0$ThR$deDKSIQM;_+2#ldYp%SR_aJbAWb{u{`xjTRdf?%`NMi@
zCoCc5`FUYAMwJX6n(@lWxie{NOugP|5+0wF8;7HvA6@y&c&{P}%MAYVm)|Ban6-^A
zWrvJcyvP8cv|l{93XC9xZg~i35#{;S0!o1HpbhZLeOxf}OcarO83sJZR6iu02E~wF
zz#eNiNIOR37l$4PIMT^m5`ZexSseGY9(;5XBjM%WMv&1&Jr69*aMt-uzit)O)X~2^
z&yGU}Q=TZUKQ0Pw^b(lX0c9e9{#w~uUny*dp!6Am^D`mn0qP-hoKTzW1<0o?&pKIv
z6E>h_1~kV)MjMclr8rhaK~_0~hk6LGcupOb1yEhJ`*Q*H6Q}LTL?`;?Rq*m;;C@{Q
zeyrn4a29}pQM~=c`R%;id(LyS3-;GR8%?N)Qx3{$SMI1>K7+Wa2(#Uk271$EprcF1
zBdbc`rE)L`u!JoAPJ`sXyt+BRiBp%D=z{b_fP4Ft%xpr|F`F!b-`1-~e!gMQ>glJ-
z;p=V>^(H62uUhvU_v>mFf>uX+gO>=fkJm6m_EX1fE`7WVZni4<bfn%ZpS>xza^Ri#
z0WcYtfGSsD!5#3ZN*J`$<UVBb_v&SlLy`^1uK0P5;e}g3kw^h5V>O`un%NAuumQ4l
z!>MUdaO+`s_D)P3RNt79X8|6UyiqBb;SK{7#Okx(TDewl=Z8NvkIc^R1XVbudy<mh
zZ^$_~rhfv)Otye$sloQT{yeuRC}fR*Cd5mSv)jA1F@ze+1GfQy5gA$FRa?e^kEJ=}
zcF5x;-?{+*a}&9VAi{8)`T33=bJIF4237>WpmoBT%ZGYwx_|k1)7#58zHQ5Xl+&B&
zrWqGc+AVgpCB0t~M~@cMH-H?CKy+ltOGDyWQQk>dgyuM8R}I;OQOeF|G{43d^1_5t
zY5{%F91<R!xI6z?C?z0C1g5RzOD|~Ry=~d)qwQ~c1w<b=?TPeF|HJZ?fdsKiHZNGX
zZS>EB)m>MJ2pDr2fn)E^%aP3shU@eAK@G447IV&rqJ?N{#V3F^*QPS`@`uxx&l7n<
z_e;=ntkFO2ybM)%F5_QTZ{3q%kGULH&<2K-%RtyjiUFDf;-o^eYlcK^ssv<mmV#Qu
zAU4Y8^(C(A4~+%!ZN?WOZ$MBfj(-OGBgTXg>1Y3aUH>t~p-?566l{b^Z%J}&{vJJ|
z-Dq^5|1aBGnFiQAhWCt(T=OB~LW6i><Qtoy1|YZW&)jw8VS<!p{aT0oyO_g#kI2;X
z>$I_k-Z034sM$o~goVat64bVQFIAbx;886QT0RrQC|N{18NQ>;s{j2M&_p)LzE}DF
z!&Z8+RCluSc=V%x+q^+wy1@G%-uGvQ;Ri+0g?n0;%@g6C09u2b8HPbwKYq;}sEgZN
zCF!_eoeygPK5Glg5N$H54Dz53?vNI7>A8`(U!FZ6z#iIz?J5P5g?Y4?T$WOdrS-dU
z0g`V(Hp0~wE1)QCJNsOJ<L41Ur#54jG}rGhm;ARqyJs52o%vg88Dx(X$AM2>5oE&#
z5fT?azPi&j1r&=yIqowEza(~1u<YI%t9cFhqJnb_*u8MpvsJvq`Klr4GHNp|APD&Z
zW>j0exVn&kHv|o83DPwHq8J8z(v&;o3p}{1yVC=#h?|r|`fC4e!h;FZ&*YbA^IXA3
z$Bw!JU_j*99<WOpGE-a>RL+1A0gpa&NU#WKHKug@*HplCn&xx_wfSLi7p4eS-L4;?
zLF8V9@ufK42a@P3Nr!psYkGf=oBrG*ehH(lN`pCi)tw}FaFz=$VD+)>GSX<n3tXn}
z>4$qQs^JV+aPvh$CIIDtA~pnh3Ov7BRt%tm5vbMT2sZe{7l^0|_)#Bg0CRm8;GzY?
zhx<U*;dJf8Y<QK$JKNaH1`v_8`v!;=D!d?l$8!K{Kn@rmE?>E338;#G5Np=#($Z3X
z*-nw=HOcbNz=~F)K&=L_{BDEtebZnisN>!uL*acPU9^*Y5-{zsgZP|)*w6s+^6r5M
zWlC<kOyD3`7xC`+W-b(iK+>PZcUi#zXLE+=kvmSsU|0J9X$B#E-$y`&35v}Iwc;*r
zc;O_~%aA-VlH(7m^<sLZd_X9)yX<rLr>QJJ-A2H7TO97Zf3wi?>CNH310;G}0_>TW
zAZX%3=0WVW%OZ-IUXW*u@faXJV1O@I0pPc)-UH%6%H0<E11uy8mqS7#VZc((I15xm
zMYUUC@)zuUPduPg$zW<>rx=$B+g=7BZaFwtwUEBU2wc?&neOyMd{VIG5ZE|IArV%x
z!6wT<%Iq7U-#pD~0f4y(ApmVMu8jemt38ObY9MO(Z2-0p10PiLQHxWD@047@WIKf<
zy!PKiVBHH~ag2TVq%F@7hT4J1KEVEe(>=L^&Hz&f`CbJ;l%DUv{)>t1349tukP3`3
zfd<zOf<_@27wMod0#KeaAm*}|xK`!|;;^W2AxA)-kx)Xl#8vitLEeZ!i{ChS4YQu@
z1ytREuK|L28_eDjaBOzK@GZ0K*846D`ZciCCwIb@mFa9)v%mmzG7(|{mxBvgF$Tc|
zMebj~=ajBawX|fW0lEPfGKTZqJ%BrY0dw%>5lZP=bzCjMJF<FhB6P&1Mb|3|6>9*~
z_y7fUK&5&5D<Lr63u##gC?z9~ZGP82#Y|=m1X!U>{<eY;HE{%3V^~6R8?nN1vmHK|
zB|Cb|6TpgJ0JF3_;`A^-GKwP-pwf6q*j!Z0IKk@`c~4sLLL($JNQ3yQ^T{E#cYv)+
zmdGU&#=OhroViqkH-Vs5&7y;&A6i6~Kt%Ntko%kJ^_BhMQ3soE4x21i!M^(vFh(s`
ze7iwNs$YIr(p;fRUxHl}cqu|=yp!8e^!y2Z)KMQnAP7Lh08HcOX7;r)Fh6X8VmlSg
zAn%PeHuEbxlE{s`XK%w5(uh;dV(&FqEfz9I6+x!VpTL~m`q16!6EXwYK49cpD|pVX
zvDv3ZoP3cq{Iu#X!4i~$12K%`-!`r!C=Azzo5l-A5*XwIV>{Y4Dm(iV1b@^5P<M`B
zIlt47`ii|A(bNMh3&qRD0k2`RX;KRLxi9{DQ0_eNunc>y&!M&0_cW>~q+8KKZ03=K
z(0kSDQV>hEOol$ZAbS$Y$xeitmBlBVvX0>}kOovTJ7B3CNw06H6*v)IJZ(b`3#r-@
zyPteu^z2q&6uvMdmt3zMgx7YEb-1l{P(|r`6@A~_Ij{|esyr=36|f^OT^L@>(GHH4
zX%;p=(JWkF1J+()<fA7-{;nbS@P$%$*Kb0`1-3$DMJGAWM9{(|5B-aftg*Iq218J%
ziV<+)EeO|ECK@cT60oo4g}8BYEI%^y?)p5!DDM@(ZX-PM6P(*pzXOars;Laq1t4i7
zl)74N14dx>Duz%gmxzr>4*p@{=o|Akz2OBu-z|9-e++MderIAbbr4rf!_GX^n*qSE
zxaV9*7t<DJ5fL%Su%SprWoHk#Az5J@gr1~0zof-QU>vUR)gd+SVvL)ToW<T}vJ5+e
zg}lWeaa*gfV)B@h<Itle!6y{w1%K?>y2l7-5Wg0K{&)<EA|P!A8GGoQ&0I;rb{yn#
zMLoDhf|L~)Sb-Ga<<1ST8#2#**_#y-d^ZG;s|fPTc@&jxDtLc&n4k(^b0_LOHMYi4
z1k)jX@YMZpd=AY^1ig-svN$y&45tE~PcUI@&2KB8eDu2RDDx07M7Enbo@Leq$$-ON
zdRyGPjvS*E)T=`)BY;%%nb?JPy9Z?N2;%Vgt-#!;_d9jpMJU&%C#Sr+7_V9zst%tR
z1p-v#!FK!!-Vx_JUxwa+mK#CEajE5^g*eN6z;SB4zfro~o2@iTCRMP8l{u}=nq$pM
z<Zka`(VbfK-7s#LB+%>xU4G^|m$GGuu0J==Jw)xd$)gtI8UB=ELd@~Tm{3MUiuk4&
ziFsXdqd83pHZhW3%Kbv)eHU|1;=f0C(0y`=x<~TYaO5fq+GoG0y$G7)ti2BmbL{K@
zGk5j0QEYLTUk`jy3Q9vEE$tw~B)hURN}qEGn=iBwjZ;#p#|DK^D$p6lp9Mbf6ellu
zw5w;Zs9<>_pU)3sEFv(*{yooGW`ZnxqNwK~N0f5F9T)*jI|T2uko21Nt>yk_53z1v
z0r}!ts~i4mKr|iV1dJG^U@)yHP<jJy7D1sPRWB1@ia97yo?@879ESjGihLXt?(+yg
zyv+OlGXRX8t|@zlKPXVPg!_&_UO3^aAU`R8`}|AnV-11K2k@RdP{Q311_k6_z|xZ6
z8k=~CUkM7lSnl^4pveNxsd!ywziNUyGFA|@+BOB;3%4_2{Qy=1JHWUIe_2kk$oaFV
zi*6vzCHjvsQ(Q1uab_lRFWUGLka@swP#2jw379^-kz~!Qw61!$d|t7~cZ<S_$(*4+
z#0aoFg~BpDRhn?$A;%?bUBL5K-+1lO+?LB=9Rw~;3kffB!FvAz@&?892c!LoLAeKH
z=dU?(8GuZox{TMzVMZ`mx*W7fDa)q--MY=R5+Y|c3G^tuZbBxWRPuYG4odTCn8Hek
z4Dw^|$PGuJ*6}JQ25lqd58N$qgVn?XDo|z^lza5CA<ynR5SKHFOkWZyIPAYBJ9p@_
zzi7Hz(G)WsKuR0FK0Ox>hx<)H8zC@sztbgdOq6-;D?KFcAnP?G5{HZ0Q<p-H*ThYO
z5Gxu>_YLfUr^?T%j-MY>4sRQL8sY<qth1=BjJfL?_G`qHdu$Egx*ZF>p2aU--g-U%
zrt)})2D~mY3&`WhER5dbgsWN;?m&v10AwfJoNNdXXqk*B&Ty#;BU*WDUGxFlDWXUM
zSgm~_GJ&tLAt^<Gpu6EBcc$$QzgKMa`n7;$$dVym@Wb1<F%U^h?RfIc$uFp-dYDXk
zh*X~T2^N0=*emv{gKe~Y7Yfo3D_T}Y?*OLO2Z%=mJlzh?D5brDL_Vv%$2ODX-iGvY
z{GYi6$R=r^QX%Q>Yo)_*d_w338IqiAfa2XKh>4IQJ)kWHyNRnQAjQ}M&5{&yf!^@?
z1L^Pzh$w>TPz$+~Yg`b9-{-;Q1@<DD4?#TBjJ$m;l!ib4ZpqENAn6zaEsW@mu3#Um
zvmI^)cLGivR9a)eRoFh$xK>2j`2B^!_?Y$6RHR7u#oK<_VTBiAsIWVWgPnk&lyqP6
z`iNR@Ne=gYsdC>Gkhe~WiMO!041w^B71s|rZ@`aG_i;oD>WdnsKz>}Sie8UPo0_>C
zfv}8@yzXXAbvUuIMz|eNd7i|R2T)rB>TU$F22?nJG;Cl|55t@aF|!82PMV4BH&RWd
zkR?1Ss8v)ZR?&!)u;31ejC+Ilc}#Z?mk$rRsUK^n$__v=eTeZDz^+dQ6@&1a4?q~F
zlo2#f{B8~`5!R4W2{NeL4wKxO1!m?qZFxfBA`X+BTt(Avfcr5FN<?g?e5SVpR3VK~
z2Nv*X%0X=0;?gUBxUFMh4Eq1<72#8#3nQk@RBo-$zjNAMi^UDm9i;-sc{SI=Zq*UF
zhY~38ZXPHy%=^F%KI@RMx@TaQ?tttdbQjDedaC(XaO~%9InC92#mCOzDvY{oUAhgO
znJ8u{;TkHl2V$lsX>J~I(=rSQ;Z2&^(^7{{QCQZQz-Y*}A?*;N6LFzTD@QHx(r;a)
zIcq$v>3&IYJ8`}?zwLp)RB@q9v4;iT(VuZ%pac$D@S?i$W-Xj-xUIk>pym{5!ubXE
zR=T4Iu-h%$YpIB3JRO61rswCdPz&tsvH^n<K(m*<e7fK}UOGv@=^AH2K08f;%j|ws
z6S9#B9!4!Vt6k8V$K}fqTocO9qdh~NQN)nOD`ZXGpYx&ok+@++iz0VD!?)Ax8CuPf
z$UDaj`=+jj1T*A^xUJC3;`Ku<&l9-@_L}b>Qy_-HnzbIUhrO_aVG_+3%>pbj(n9@L
z8{c9)l=-jl3$22yhvR6hE9N@zBpJDs=2%a98yIsiAo<sFw3*e|p2`tr^}p6V*Ek%`
z)vW)xw2_OqQT&8kOZqaJJOI4{#7+GClkHa`CvBn#ChduOq3|iMS?qAt_7QYoCe`jh
z^Rj5Xedr?30GzWp%+*rT<lU+;v+`-X{A&+&&YesMwYpL6qJ8GD&d=%`QC{FNzK<Zn
zv+3{~Wj>a3#gi71`>oyijSo;glM7CI%3~=lznCvEh;4un+#2L@rnZPmV;93%F1?Mv
zg_=L|dUsgcA1i|p6nYA+C|!AIMO|A(J191E?w0C2`Flk&q2oqI$q4h+!7R+kO2fCx
z4_yKpj{TsITB9xeKE5C@gF{RK;fG@9sH&~fy?DByHE;qY&QSwzaM?C&wG<nOQiGR)
z#e7#`!yZ*+_KdsZQaU;^kE{0nX)l_I^A~6Eob)P*BeWJAWce-{pC8tPk3`o+DG`(c
z;p`50Q@~NofTFDd8z$4g3hJ`TAc95o6XNu-TcF&)=n%{p#J6pj(sbT{AvhM*Sca88
zfMhsOn=~3~be6c93xDkH>gf?%i|{&p4U}=u#2E>bpWb20{9F(cK=8Ezw7W1jAV=yK
zNi(VT#3QOW>=jyRo9U<c<Y#nQXM#K`&uzdPiVPBOk8&NA1!-6Vr@8$K85i}6lZZPo
zpJ?FUHsUATAzEXDed8v=JRoZ*`t%)?dV}j^_A#u@G0)No6s}!7<f9N#5<TQ-YaWX}
z@=kUzz}gB!39AH@ztiO9fJ{9E*qEsWE7=4l=j*;XGQ0$IVy3~ZddIJ1fG}SKqV{LD
zoY(r^AQcGotbksF;_<C`;cMZQ50}w^`F+cFn@KPl^09^dl_-Hh(bTT#aAmd%fj-wC
z^|fZwT#tXL;pm2d>mCTd>{D$sL;yq0WBu5zC(=~tlqJCiW;jUqBix;iqeBp}cR_w?
zPT#X2InNLc3nu{)%$wyaDJ?Gsh&UjwZM{*i-{lyg@*&PoF(N46r7}!{GIy|VOH@l9
zGH$CE#Sdx|Lgk79Gc<3=7b44ELlp<A$d9P-`jo{M!ktS4M3|JP@;z7TyBWy>LF1hY
zY?&Q!pJ=~Hk|{D7LuAcJx+sSl<{Bq$;TX|0mkA)&L9Q;w3?4oe29D?I)dX#mo?gli
zD9hU|{M>+y(@+K>OwM+0l$x-3DJPxa-Mf=dQ)14Keh2FE79;DOZKpor@{)!yR_{Y!
zw)Zu_$a&AK(ukT66)pa~B0jiNbt{}lr(muF>{N{JMqh~l?Vd&Wb!BdM2Gej;`D6L^
zqv=J303T437q~6)X{d&d!<9vMlI>f`H9?mP7-awrnCGMW{dcxjHQc^Wc}ZFDSCbUk
z@s3j0HN4Y_V*Uhwh&K)}Rcq&|9Sv}#)O6+`ZvF`6SQtAc_-u>Ip=yefx4hh2?Zauq
zM^;?&-k?S!IvWfb=i)!jwmgViZD7HTL<u%Wu(LYp#az~$bxm`tq(rjpJ9`M5eR)!O
zdQ+@CU}J6S94L%aJmBcYZSDKtBRA?dw;ZI3HzIOIn00V8+P7{#F}A#7U6IH0-$MjH
zRj8KCb>#gF=rA2uh}k2%bzJ2^kzIcXf~x>H!WC`cL@DA%@yF<bg1rll7D+P%r31_=
zNOTU|3R1`wB`1Q<pmU;b?X@;1K(A#mV5qM)#+8E@P_*!f(QKeKM%N4U-l-o)$*17I
z(5g~Xo<W*`b7-UY$T~KB9n_#|W{S7esv{TJ9Xlo8b9-M{&jP9ZsdDGCS{NCFmq3A7
zW)TREg~_|VU83|74o4FN^gOCjl%Gk$CV|@I2YX<+CMq4v=JXB}+HPnVR8gy%QQU;=
zA_|lNGF4+}DXt0{JM|T;h?;@w(@P+E46K8C`!!U}B#2prF9wiO(><VfQf1)tv7<#3
z1h2@JHmm0&%6c}3%w0kLTn-Ef;>N>|;4(r;#bLB^AU+A_2w2UcZUZ|vS9w>E<8h}5
zwy5MC4gtkc|2ISa&##=^1hc%gZMeirM~UGTXfVXE0T0X%d58EU>MhhQKb+2eE|7k-
z;@nb$^9>BS`LO@}o0tBm1px5`-_<>`zBbcWL<!0BTv)P1oO?~3Gw0?C!mS&JO@g{s
zfgvG}T5TgjADj-a-Qkb>fd8hN#2;4A{i8Yr9Fmgg(s&8{nAX~}H69knNGx-T&Jc${
zUdz<0L80#6N;jFn<(wVTRffx(=+@b=8Cg50in2|mh=s&{5WV1yfRj4D(778)-WPbF
zxTc<90wAD2l3!}SAZlHcUw;4bM6Nvq7HrOC4POo+AVZ-BIWCZj_z|ND=%qEUfHkCQ
zV`)T6>jr9T0B2upen^SlqrKW^4qkX|KegUA|9QE1fG5hJO0)|#&(5G&>;ERfzlwuA
zn_QmlemRg{^C7vF-;I$K8MBd@ddrStzD%SnqK$pgT;W#txXZOUS%NS(>J;g{56vdx
zy3vDPo&p0iHx?C@1+Z*sV7gM|U-9(r;_Lz`fGx8vUcul<j54t|C#r&n^b0<z0+^h)
zXjmm$B0&vnKYaB#iMtPVtJ%cUN5tPDeL4e`iBpkFqGep@b5iU!A7$=t3F!66xK!ve
zc&H{F-T!itwqGU<7(=^zzL$|W;4-lTA_JDwV4^Tg!rhw*Ke27$J2Cu@-(p$~{fR0z
zEw+VhnMU40inAhoXA?HDt@0zI-W;f4ELMkF%_4WK7&OLQ<4W<p&C;uLsQ9#VhVx9q
zcJ4P!G8}>@fC#4qs&7^l4(w;K_CM`PTuom!QBP<b(xTznu1<)Ws03-6<)bK)TNa?j
zD942pLeaIrgxE%oDrj&3UvU@Yk{IA?O^#r=iD#_>DD*^!bYb<kHgcbN1^ra+gRh_-
zjP-i3-f;#h0k0|oNXP78YWw)DCU18psczstB`TD8*<^3rWNH$4;7+A%SQUs^FczO<
zuk%}>j>I01`%X<r+qKuSZx5LD^~p|-QPK;!Sr-l6J6rj6aGWck>ULGh<g|?*zJKT&
zjjr{-Y+vlK-#G&}^&k$69@|T%MY!49g&(Z}wphRlg_gP5ld(hmPCD}|)dVIp4_u3u
zol#(&pu0?U0v<J#c8XlhI7kv9Uk$n}G;_YD<*bx$^a_cGO7QghD;=CPg8{9Jd^aA%
z-{w01t)WQ#(e=wj?eZZzd!;qZ6D%-Uc}T@ArX)|k2^sj5a)lirzaX0H>cuiSMrHGG
z1osJ*1p#EQsoFHJ23c1(US~wOrGZeq<g3H1kFqc*E#AA4G1ss#+!#vxI{Hm&$jrg*
z&MlmJq{^KIQLg?;y9dDhc9Mk7^$n=uug1N2EE+B54itli{QN4)uRyKXG8QL22~-d5
z$L<hf_?+$UG!Ii+^l!_!_*8<r*>m7|>MlX4tlwx!)OM+a$i!oYPN}w&VV(KyKAmO`
zyk^f)Ie)?{)xGhwZDPn7IeuoG0am)6u`$V8dX3KI1Js(0q{Y7Ak{Eqs+9Mxzof^^Y
zAZGiU@STb@w%{3og_+=qW&w<}H|ZRjJ&IU=^nj4Ba4MK1E>JL&Up}RHn`H~wz+ZLE
z*G?o#*re`q`fibM4-XF>P?1mZID$xK3)%&FzF0Hm*KDA4{fb{9h85Ns(?78?PV@Zs
z)-cW!k$|0$(n{~{PSNyOcalG`gkE8dlnU_EaG{0CwS8+}`3QILlD9{FIKd3RcewGw
zhw*-6;k8#W+*1r^ZWqkl8|+d?>EO6)!By@@esGt(`WH+yj5}lcZxgE$kX7Q7Bg}0c
zf&S6FLi0t^38h1nK@hrCPkg5y!G~9m8j;7hN&rY*p8vLmb)M-P@1iq%4M+o2c(LO`
zix*H7s|c|`UU8qickSMUxEIlB%4@_uSBHQ)CbFc8eShLE$5%p=A0k~qYcFYuNJyOn
zNyibhaz{ugOSxBxNA8=?H0Io@H8gBjCs+*BFnESX`zP>xz6d`CdAb(x@6em~iNXjM
zL6aJ$qay<O1?Eu5W*|+tu2F8(xWtBuZs+^H-Z!aQ(Dp$d|0u+f-MZjmB=Im%-mqCU
zKe&#FB64`W5eMA9nXH=qnleA!NEYJi@Bo;boMx3O5b9CXrE5NQS)x&=sb~LE-^;|=
zl^thMN!p%?W!aY@Z-^R~u=WHS=qF`9R>*B3u>Len@x~NTC<c^_*NrOdh;q28h0q3_
zdE+3|6$t51g50F%HoG+jQHc54w>op9m^6z5bK>^E$`TZl42{rSc<wH6De15g^^(#U
zQdKlj!~l7o;|&(MHJ;Z5es1b%?cy}=Js=&zR<$8L9nJ>w>rY`Mfo*ULIx~jK>GW!n
zfm+NEq{|hVA?HJKD3QHw<UOlHdjMASAs|MHl9=+nr0P3|HMvlAR0My#c>N0<mlYYO
zC}_>vK0wlE1{ZRD`Zntnm=Tv%fSK*j&Fq&4zg2ZlF@Z896GfS@J_QlJIf$;bCkgIm
zfjz-te32!SNm&TapuQLh0b=<2Ksq-1=4~K~DeJjPktrP@ZybE;K*76_1myQFaRX;i
z{&-1)-pPE^qhCR{#y08?(IjuqWb%vGW3EqL6&_8UOS&aI#nk^U(8(K=sK6do@v%;k
z7DiNF;EGKt3Ja)!8Eowz`HI)3;q93PmTI5Wr{*5K`V8-R0dD=&ipxxK)(|?;C|!Ov
z+X3I1VtTLqj?(CC@B|d}qK*%|^T8J*BPDxF21MT9V7{_<pm3;=9qc$jrQ>WNhIn^_
zL5=OnQV3JM^d4$~iyE3)RL5G5xT)MkTve*8XSrX;-&R_$?$MvR3qmoJuhdjV3fReI
zxwO)TxoS^mVNA)4nqNQMJ#)WLl+p)y1{VO%HS(Y#jB`+Z-*Hx&2sL^S{O@NByR|b0
z;cMZ9@>E|z{KXTd97;x|d=BfS?r-+U>8;yJFv&)ax3}~Xx`Vm=NZp;7g!|8PCZ^*j
zMn|*E$gJpRyFd-}CC847cj~QVg8jsMQbw6ULJ^s*)K05%bj?l<a~`58>GNkW-zfTA
z<gK*2;ZjPqL3GNqoboJEE-deFL^dw7fkM$;%;U9&hok*-fZ5a)X5N*v1i&&w0~W&Y
zcia=KmJgy%ea$Eg*PcB^-^{4lf^Ry(nw?{B%on~38k0Z}8oB=rJ)z*>0zUfhlu&dZ
ztuDo{sd!#2$iD`5-sYOnanw%w#7H89zXKe!W$+w8S#N}l!PltD+Ztq3*n*OUDm}#r
zCdC`gCOCPK_kf&{x%fH@M8V?NsT(H2p*6_LAEk32ZROAH@-I8lI|zjs2^M;H{?P4>
zM?c8uYPfwHaHAYkZ#DZM(TIU}y!7W^;qB-4zJ^QUx$^}vbt3>oi+E1_uzZ0_z8ax2
zG5Btsh}C_ZkgUn9(7;ydyuA5{0nr06E6)JaQjC+cch@=_>Z4<Gw;!|SX|S$&$6UXL
zyR`(Q)y>ihu5)uv_5oIyEawAbtAc&JE?YBK${FBnHF=9&k%E{oTc=o-NH74__{x7*
zE9Nq)KoeM`WC|PZZ-Y!ovhi%V66$-unmc6ytge$Rrlc7TDeI77bvHRVnRgiW4}tW!
z2!F^qq>!S0^$<<1YWlr_VGN$NFFFc0eHb9v;e?^HJUI3;ft!uQfRSuNdlG?h;f?aq
z6`r|M@XBWtz}MdTU!Is?a>CAjN(XaCs=aDJtcArw1IE5p`?lS_!wM4zDVsSMD4VLY
zXVzMI1h`Gq>=3muZ13xIgWhgN|8)`*RrU^fd|`V@CR<Ps<*gX+H!TtBqdLLNxO~vY
zi>52wf3*m-W@MZ?LTIBp?Mh|s9+&jB)1y9&dH<XiI=N^?eb+rmC1BxcD43}8+t`Xq
zslc^KQ2j~VVu6f8M?eimxjfwe9u3m;B?#G2?QexAK-xua-}RydzT&JSh75if@=S+E
zPwxlW_qYc7?XQ5z&*^59jwZ8y49Yyy{st0<ytR`?F%@5yF%ktv?3oZdgQFiyNKKfC
zQyUr|{Td!CfFw#~%U1wCwJx7_)*x^38tDij2B6*3H*y=eJS7wygWO$|crSw=4V286
zwdy(yVR2R+R580oax^3v!?Hnq9P>&RxXdhEv^=nKYNMB6KZVYIl-OpjmqwPA2B3=q
z5bVFlz*SELi@t3NwnAD%O_)V72fUo)KA$u$qfrnmrDC}_p$i#pPe`-qg6az127SQH
zYA}?Vk+!#(o>(AQ7jFk}?RbJh+7dvKBX<!a2F0~H#=U+kr!<B>KW!@3j-ksC?F2?t
zbT|U=onOfFFWH{N4Gri-C9iISa^yrfRk^c@EA71}hoGdb(m_9NZV4c=`+mTKVF-YY
z={#XcX_Du}zT(dxzqkH$7Vap2A?b{<5~BtDHno|!1N*ds=h7si445$$b{YAo+D_pf
zJ`5PLeHl(LY&n_M;n`d1Vg+$0#Jn30zVte)R`}1u?-csqWxw4{7qw9U@jY$>1ltH$
za8TeP`x|nedUx=3Eq8g}!*|5GQu&g38nagX0JO^OjG*3999@Aga8%v4PRXusj`h7A
z+av8+{#FJnVu0DFffgSNx$d?V>loyP;XxM7#v065sP}Gjc>COsUb!yKgVzvd7SHTJ
zsk<B^R@BU0Y3741?!ahR*4F@iLXP8xD)kz<h_ho9HZ2VZ@x8dGc&u+(eWeS(79TB?
zfS{0-pM%=vQ4>tgj(a38O+cMil}>dTr*kiG3AKQf_GaL)T2&Bh+n5}@=gk)7K+o{(
zt7WOuFXut`b}73Lo@|=Qr%$FHo{&CS^BC(HN%7nQ6YLy6*Y?zSVR3Ip`V#9*Tox+!
zkJ9At(U~p_VGi==Htfd@@<=e_G1@faEuXpDB`cZQ4fK<3i$k3%R4x=O+g5#kVrU2|
z4Jq#TE+KgVyJxS!9rZwZQ9h)VdaU%;d*M$4bP7fWK>-9vq@k@=NSHsyRoRpF-myJ}
zy~fFTV84R(+F2LlQ#XHi2l(c+wrMAu`AQ0X3naxS1P)NwBSdVWuERk3uap~q8ynU%
zXpRi1h>QG2i>|#tB&Piq^Cr{5tjrpiS4a?@oVLKJ5am|vAE_fS7-)`SbMPmj`krMj
zHDi`TO*M#cdI2JtAoY;D2$Oj;r_`b}!4vP*_M!zpQK{W*sj$>CTOt!<TeaN}=Ru8r
zWskJmqLvymqAvfV764_4K@~ksl@gO|M6O#!)XF1x*TgufZR!h7LZcp(Dw0Yts3}oV
zbkdWB4#^OG-~(Z;G(P#Q$Mgf!yKNaT^WFzZC)R{wB;H=6niLOsk?jL)s%$}_r8MpA
zy<t%0B0cY>8)fYE&=EC~BakbMKyoAAiRo~fzaT3HWYLCPKKn!T3dHFfnST3l>UPl7
zJ#g+%DPIt#sviXAjm>!RM4gVgUY92a{iH7=hk1MnC<HUCuZG}hoh0Li=kvR4I0i?k
z2LmnHTUC8SqwN9X6YGj%P*GMTf|Y|lxGjUxh~U}eKJ{hFw90QeGsD|yT-Rh$7<H<3
z{Zz0fN$lZA$RYv4C)Ks%Kc`dJc{XS@r02ys=c*$Uqrl#Pd6zm#Vr&zXk68hsQX19C
zW}=TpJ<-TNRi;f)Hw!Ya<mw02M0IN@gNKMa5(kWA6LO71<BW**mTJ)=2&l5~_1FWQ
ztWKqAlSu|GYseW%EW5l0V7c7X3}x~`)Lc7&df%(J8@H6<zg(UF)O<GF>gF(ddwObD
z&}t#9>GKaf<<?Wvym{J9bPaCJQ(-^us?A9kw`r!8i4uzKnJVfrii%*-7OwA43m>&u
z)$Ip5&UzLm=Z9xIrZu^ro=Xg?y2v_n(X^0|t)_j=xWR>PZ0TVeX#+KvghOq@4)h%F
z`yo)=njAYi^du=BS-<xU;tQi$VPSs}H~n!B@gMOMpzn@-94L?puxoAJFiu=1Bgy2$
zQi{gRDB93UIZ#J`RRo{dn@Chq4<G{S{xZ;Yoe_Bkvv(5|7gR5LU$;6V)W)f;dOelo
z7AH!%nBO;$t-R2ps7P2~Ri5l2py}$UvXBwg%oK*V%2HVtuNci09O}q`AlCG?ylXV;
zE&l#cm5W5qsOf;u)NXj+uo>3jg%+r_Y0-?FbP>KjE*bN^&FZ<Y3M_!yTLz=gk<P|@
zXO##e3Px(d9mc77HMje+I485RlegTUt7m?wH;eraFK!0YQs>MvSwX8K_wGijn?dFx
z6_Gr~-a?LJ{#jJ6izw1CLyfyFlg7XSKdJt~?Oo_0A~R5tBY<!lMS?vc_@x1JjeIxj
z75k))JNs1&!~>O!%4twXQq7A#H^_x6r(=r7PaBV0<;#{o44@@@8uD;l*)VR(6oj_L
z4Caq%@?&L+`720CmGx#TaGANe!`z|640k4^odbKN3@fY%Q}0PnvdmHxlvkemDtEJo
zJ<EZ!i*$Du0yB+BRYW6+m63sn*ubgA*rsh@bNy0=?7@}{^#zMix;yUF7pmDVc<)rO
z&AzHU=RtjgtLKTg?en&>eb-Wwy3>|C9~Z~#7_91&nyhD~ztT>xwL96D-?prleRHM+
zy*w&=g^w_;FO$E`{lfH2P-)p~K(d^C?zz7%w=7gKe&1U+D0z;*pse>LJ|+wvWOa7u
zWqPz@?@BMsV^2CD-_|ZNM+U_ramX>+Ds>3z{N_QT{URg^YzDgfqJW0<@s82ZvNd4O
zb<XnT-+6V9e2_8mkov{`6x$)*SmXiNPZmF{Fm()spDECRI>myL(sK;zk;B21%!49~
zX`(F2{oAOX0hx_APe`X7Sk_p(&9^V*;Bum#Y9^+JRfpvN03%k;zfRzDH@VKL!rj(>
z9@9E<Gb<w8BY|EpWj@N|!JP4AVnaffa|CI@DasSF{VBQOi>n3gd$YLF$}tY6DUbS`
zoLM;sX9tDqz%lu)b8R@$(PZ}vITVkUpFS>xfADw^Ad_h2`NmV;F6JHp5sJ)GSG!A&
z+1BqFA2|n_isjZt(d)&uIb%JorV|5~$w&RSBPS-eP}B&TbGAdGQ&rVlG$wsHcdX7`
zI!D_!vrGQwtnQUI7i;N;;O>C#@a3)15-i)NUG;XtJB8g2#PF|XU*5-`GWM78U7i8P
zqbaM-6!m9kDJR|j!53RX=;D44<uZ04eUevtsEs}cEM%@ea~lEZNn?>sD)7!s{$mdt
zy&EO=GNR~Y3v7x)7}T=&8YJCIQrK3u3C~_RjnqZVRD-%u^Q1xo<pitzm*9y2j#1Kv
z?TK`$_uH$f#SQLrYS9h#LNM|GXJNU{#M^pTkU{R3(%23kepAuAj#4bi4HxxqiWj~1
zI50-^_1+8*T7FW3T+=dN54j69yLyc2)M?K=Zp2}(eSb-1EoWnGLUpwMK8=AVz8-Px
zn#L3*QE~`T5x1Wt1^w49cz?=>d`WIBH>x&?GxKZ#x)d~S?T(qa@fb&})a=85wN*V~
z{P7zL4JtCP5YAjp#bQi!z&PEldzicAfr=p)8Ra!R=nO20!g4A1CjjKy+E$lfHc<}x
zLk8XC5v*G$m}Nk&8rG_0$exuR2vxbK>;|ZvxS0|y|7hYmN(<%((wSEeVd00yYs?dk
zyPw~7PfEE6!p^mA0?g)pmUqZ)>bWhaz)?7C{5~<$SH{ZT270}2u+vy!@<{tQSjJtB
zHO#!AfOYtQ3o*z(<O%lDh=cg$nkYK#*AYyrlIH^lVbT|M6^dwoH;aYn?l8pE(Fd#+
zP%2M_6Q7MQsI4N{5xyTGYt`%%fC3+L**SE+bp=osneS7k(b5BBv-=m>;Cc!9MR@_a
zd{<P7f<v~cxmHn3VHl5^NAAM(uj6RDB+RAL1D?;lI^J!Ab(h&+*CW!`u<X6E{k5r`
zvtR0@Hbb*#RG_Nzbu0^Kp=if;k#F;+bT9jTmkA9kWm9iymP+1w5&7!qT@b`dm=H}=
zR_I<;w(a<+(LGlAt^Nh@Z4BBH(rS2}6mJKn&I?Q_L-X9XwJ-HvUG&UB)+gy7J0#r>
z^uVUOM$M$D_)PC$(-gha4)MoK`BNu<%SG}6eDnSm(Lp?cFOV?oi+p{XRL04|==R<V
z&)g=0zk`TULh#I_4Dw46O|8*(wSvh_y=b<Nh_dT+aeTgkv<uOKq-1fn9Kjo>%kvdP
z8!{DI3Ez>Eabsm5`fCyXb8ufFgf8NZ8z+w#b*&zeL)PrMK0c!%NB3lP-}XpzoQF4N
z{)X5(6dEi0VJ@JvYhkJhlzzy;pnAvchon6YF&5{^ns(T{I=avHX-P0<ilD9*<pOt)
zZpfY50X4q+Sy2iprnu;gWg3gJRQ)#1%-2ttPmkg)iao(8t1Peg=;U3?f|8Zl_^4#D
z`ynJR&rnO_Gu}WT&pyK{^&rmJvzPG}+V)KB@p!>-6>Ze{=`g;OrDVKtXl-zlF41dD
z33t~CI6H${*9(_EYW~fktZrwvO{0d_Ot(&kI8h_HWb0@`(|KJbf)Lih6We>?Jj#RH
zy(my>4w5`RtAPsVylr8>x+W@&;ofXWNj|4pz2>6I&|usW7yn3e8}ou&_<h}w*||=-
z^yA*3MWo^5dkC1~7H6@VEy<m&-5IgXHdwe$w)MvuR)0)L{Z8SSK%?<`Hz*#Cxt~_V
zyd@(D444>QemKki#&#@1Qjh4)_N=_}z=iqFy#_KwNbjDrW1pH*`|SXUmwmxbul%>8
zmavR>Le5c_aBexRYFG_7wIem0D(k6-*?ZdQbfiVh@jTocP&XI~g|mZmAuc3?8I|$U
zZ@Pq{2<nSZDT<(SFDFrseQ5KdNOM561a02;96Q7KuzJYPw?@}%Z#lL)ya@OSF|RpX
ztWm^zB17t_$7jyBmk`IT<XRM-+OBs1k$Az)fMyMma7!L7cm}JS>7`B4q}}5p1c4Jx
zTMcv4g)L_#*|HR8g4ZqrQV9JYandh{mN}gN`45o_nio|6#64QC@DL5{(5_yo%xmCO
z-YP$9$#s6b!;istX?g2$x{#J(KVNkW35*-1SCku)!;Cu4ZZMgIdc;e*4O-kQyEgl+
z@N;E!`HT8Bwa8jk2Qq4P1L2bU10umA7(cGU7!obRY~PaxK6`;Z+>7d{8`HzZxiK5I
zg_af@eogz%18nUw7{m2;rHt7q;({*xH6+dslc}UX{`5saa!tqWOQ@t0^t=(Rf-y3T
zAAhywyBqczgB6xxKl(Jy1sTi%_MUoxJ*Gl218tl`*?h)`lWRrc6#5L~u5na%oKU6X
zE(zzQF_Ak{v}j!>mBh@>pnh2X4Xk(h+uL!4)Xr`YmvxAJr7;Fu_&lh=`TT|iNl7R7
z+R;o&lmmxZjvR8ls4;9r9<#N%-xV?6TKO???~y;-WBwV739`j&#_@}^?`yfNZf|i+
zf&#z>`&NA-dHB5ov&rvKfO0mt+4eDjh{1o9tNk)@z(9z`-naSX0~ZEP2@?Xd10`iy
zV6(LIR&de;r0n5#q?NOP+Z!&^eGRU6t92Z}UZ={juGXV6*5u{DiThI$N>O%}xlthG
zK0|u=k@z<>R<rWMwe589kfLvM3E~GuH(5;RDX|HALcs(gGITaV1~2#L20cG-dnbsT
zGi4FsnZb!jMtU3<%Wv=A<W$CVrMrIb<7;{)dU%h)yyzmTpt4x{(hFnrt0v#g@-Ozr
z-VGRYK+JO$3zf-z)0^|&-(fUfbs3*EB)OwR`SqE%ciIOg&Xg=v4bE}!=b9KD=1cRB
za-l!i=-;z@wi%y<!7*EPA56JzTgxyk3Jk~NCUB0%EVvw07kk=7cx`7>>{_@kfmjbv
zXqcl~gDguNr5lR7!-t@*l0jBgf}VY;W(BV~j@B=+l|uN9Xkmog8TmqEd!(@rkbC-b
zD8l(B*V;{i!I784mstsOLay)gItnlhQrY!0GMVfKe>6e<<;QOPs4wicq$R}!!NmMT
z)+ogyIO`MZTsCYcfF*CCxQip0%{%Z~0d5%a^#(*&ku)eXH-BTXIz(qr?WQI#F;|js
zaGJe3NpI36VR8JHH{5xMl4?~E195Dw2Xb=vZh0dO5gdFUEi|7|)V~#FL-y2*?t$){
zMP~s|Jt#Jp+mGIw=`KEET3hPlbz9Bgre6Lw<T1{1vKJ3Ehu$I|Vu!Q$^o}#$zg5`2
zF;Z$xdqWWS2H5?H$p^SpBA-0ml5mL2feB>S5q$^rs42$h#v(AYR8h|Zv#%DE<bv&M
z?NDW{{nh$=(Ha)-S$~Kx4>mH&&=6r%yc1K{9CZ|*VOm4gwS+Mrzs4w#6>NkHA%PU}
zmImy!yxlYVG=~q((c4rw!;Vd?q3x9uK^)T*4bu*cMD7R@&Ef5<?VD7MF5A721ci@}
z#N`{eP4Lj&kw2E+9x5_p)=?QRLG7ATG#)=I8$Ol2OSTyu6n4#{vT=Y=j5TgD^$mwr
zW24;ID{=Q(-_W&q=h>HJ<sqAE#mktq=YP}!gkCR;e6lNYuX^O!_EWw7D?6{p7@nI3
zYAOTDKoDoMb4ow{E(~?F_ym_ZeSkw{r1_^EE6u_kGOat@LaG;{!~=vd85TuBOFl6H
zXR<BpsBmr}!_;5!t%;>W=dT911NKq=YSvjJUk-A1d8Y}&BLSXR_lsvR>lA&E*R6AG
zgecW&AHY8BY>7*OkstHt2Jsb(?VCWL3iUb~dUWy<IhpfgOsFdh<|YR#A#o;OfOY`i
z2@NNx6<CpU%p()*2cj)cKxg%7=+!C|Q``c!-z5@!0V|9pLP>0aJiI%;Y_p^5dd<WJ
ze0YYy1deCjF|2|*nE1zsoG~n*mmmivyRaN))M7+n2f?Yeb6MDlb@Hh6mZg~+pN8BD
z*v)s}n%_CM$PLeelu?*<GtDdIPcu(AHIGv*0nOw{#3HA?5+*jk3$tvodv-XOAf{Tu
zX~V>4qKDWUc+_@1bD>3;dDkj(q)S<#-ajb6t1)OwEj!oNU2%QHX_lt^jOw_CkM{ZQ
z*Q`Y++?uLrjUMslaFLVcpmO`9K6E9>;8wDa<VRrLdl|So0S!ail0{?t8VpQdM81;`
zmZhp<MeE*8`lYPYQYkHewsVf=DV#fjY%HOgvO}uk<W$ec16IGyvFrMoecgqhNxbA7
zAz$wisp;YYNqDBB!m~TPD!*=b_;j#Uv)8&zn%rBm>`-GQ*4kvBf6T_EJ9iL4e0gVT
zeLkF4gt<p}R)1E5NJQ_EkO>cM{WgDF_>{~kjsA!IcMyaZFZ9;!RfCL?304IuUfg-R
zB~9GZlx({gl$HHeX879*;$Iz|VqNqawQvvRrkCiIxZ?bs3Uz$fyVB?=VYB#%8`4H-
z>}-Pku^gAxM~kCPX&webr*($cF`8^=@4BX>?2=fV{eEhkYLNTdZ4-iVlZicwvleGR
zi1^Q~l#ZR<^L#v#9t_s~uA#ek6Q1bHZ(`!5-k0ETo86V?aNL6T0NPfVF8MUMJAq59
z){9=S5nHkMC$y_OK%_787dpz#W{5M;%$AmymPUZ%LM;T!jNXpGRxDdNt%DA?qR7LC
zsIsDp5bC3TRJa0o2%d8$-Y=PL#1PfEV8nDw0qzjvt*$n%>~T=X8Mxfi$;pR%Rk3!W
zKmGYV{Hy|VeJO^hF6@Y#f~~rd$E$N%Eu8$gvw#6m0$i{I-WeWn`*AFdm$aQX<{>OI
zp+Jr*trEZjjd6SDs8AIgES4!|18<`mI&a&q^C%~wq|No7g06bXmMoY7^WAEI;q=4=
zSTKez%B{OJpHD40kcXiocYHrnHX`~`H=cTuZEl1vuv~j}Te6W5^E7wu#=crem>Gcq
z>N^&3^7Fx@@)6rNB*6vu@B4y?P983%go>fd^}V6Z{#io1PyXhqkt^m3OAu2o+0CrX
z3FInLJG|z{k)0(p1H^bbrnpjS|ECBx_Jn8cajR?`4yv`TRGE)IckR$-X57`GB-yAf
z<}sIQ##}6hEGd7?UO<WVxG3&VmBR2el=OaWc2V$(lsd0N1fZK<;{WtiM~nn%!#Nm)
zZ`=W1jE!DmT5o_}oN+%vhR|sgF!#(mPGYN|HuVXI3|SpqSkL*nN>rG*icKk3O3T||
z71^$X2Rao?PnD#*2AjdAm(6Cp{A3B#sc_3;9V2i@?3Hx0D*vJ|o;<*D8SXV^Hs~uE
zOph@^K8t#kb%S(;=Ez{dnxrG-qE~~P)}(EgoIXP`;vGB_+qaB+6@z+=yeE|Q;;p~i
zjhf8~;rC*|gnKQ}90Rw_<&j#L9qv3U6yY8tXHBKtEo?Wud+g1Tm6UY=2R3&fubyw%
z+y$`gx%M!*xFdbXj(Z^P#}i0HTFWelcj@gPZ@f22#c^fy|2bEGS+9TNx2T~fWz-bs
zlK5()hOk^NhPq^Z6KWa$u|lH<b(1C)>ItO-obS(<VCLH*Gh6Z00MF}y-FJEAa&e@U
zStSvdC+yYB_y88CT3?|HSE<y<Bq^uz*GrI(m$z0(3S10AU7x9ntM5Z(I0Wu(YxRYQ
zex#!=c^98DmI^wv5YH9J0}s#M4f{E?s4|<&fudEfs@@Ix?t!QnqSA?>&O5L9!%&}D
z(X21|Lz9oRU)QkY5)U|O-OHk35z~EA+a$;O98t^;uf<6_vP}Ns>{aCh-7hKa;zqA-
zFBK$!qI1f(eJ*QZ&U4Sa;65RmF1jKGlRP;p<6nb=m)x8}9%S;bnM_Q^bZJ<KT-lBJ
zCRpHs7_VXK^6Btee_bl{SjXbIuvS!AnJDVZ%SH(YN9|E-%CxLZ{t}(OqmiPAKuAw>
zbf)wmDHbox9>&@3wmbw1Mhh1^wz_dhjZtbw-oWnG#LwreNNmV&h})2!BYF&K^TI>=
zD4$g=j3_pdW*z7bojWyWDjGp)dl5I$gi^k`L3om7G)h{%ZNE9<l8Gy4K~LRAN=Ux$
z)kzPJAc+<Lf44mwe+PjB`XfN^Sx{>11Dv*T4Moz4s^P&a3<!xhzo~GK1IS8#qsVhJ
zcXQgw80c#tz-#Ed?Br>aV~ScWbF??IL}U-J#`7&e8yTVzRVHIns<e{MJxR6JCIUqL
zEPj9z)QzX^mWYVtnY23NL{0L;rv&mPscI2<+<n=?$P4}ZKlUqZ<FA~Y#-11%*Z>99
z;M~VdqvTSv_}hp`mCNO>b<W1{2aboG&H-nD49776^{MN@%h963IJ!jY+DWUJ4U~5{
z)k8j^@{4n#*M#Nkj}FIOANUH%>F7y~O^`+IQ=hg!GPb*+TC**~VkP3=r&_BWw-hFN
zC7mj$!)_hklP=S1oLb{ILt36g)>m>CZ%S!DmKUU{!)8u+^4aoXD({-eenin^a4#jL
zJh_zOkuJcI7)$T&+^UsTzL*4@nw$3S;^z}j4W_$Kl0rrX;=R1FbM-N8>NdfeFI?8R
zeJf-OJbKTHYfbH8E@wN)yz#lwuJXpg`Nj9C)LqDz6bNnVJ;>7bJwPXBnsA!<eyhYg
zzfHJio}8wOII^dykjehqjg+Er|EARNTDi3BqWF9-k1#!b369zngi2yJTgRpKXQ#A5
z4if(0dL0~>^Ju1RVmo|EJ9R8IaTla@*RAK|djmH#ZAfPLA{4&S#?P7^WS!8k29&|?
z;W8Af$Lz@azR4x7ds07fIbieIdndElt+Cm*qb_3;QwwD24sy)1w^%Z*eT;fNt@kS&
z_kL8L3i0Y%K5ELB`Om^OKp54{>M@Nl0)l2Tp6;6xB9<Jz=LOG$o(VW?QS9>Vkk@$h
zVvRh~T%(D)Q16PeaY~=<i8dDz4;iz1nK8dyhWuE};~GFQ{{os!bgO$q{VrZPIGJ{Y
zXsy&9*Q_pmcn}@U%@uOG%GX$=xx)Q0e5M18Y|OsuGw)Q*a!a5+$|(q|;r%rN$bQQq
z&UwsiP`2DLTE94z47ul)PCtD9jmPHNUFc#t|AkmQpq+8|4f7R}mx*~<OnHHD{W$b-
zmALCXe4UE2xW^#4cvWUMG6?*0A`8NbiRnfvs)621)dSE3ukIne5~A(4F+(4K=6ovf
zq3F5wlAa{)3**n%MeQy!)CXiYwTp`2r29i(9zH}$id0V1K=SaSCDu%t;T?eY1~xIk
zC>Z`9Uw<A>_1gab;}Tgitz}q-2n$h`DP_(~hKLGFB(szuBxJ};#>_)9MJduUWGERH
zRx(RTq0B<bEJO7@U;DoI``+*S^ZxygWB;-DaqQz*%X+=8>pHLVd_Et~mV=Jww4oGj
zht=-dI0(iZXMfguV%02xB0ZgOWA#~Z9X>ufinqYL`l8c)?{4<0x=QVME(aHyJE%z4
zc|>6U628!-8^5wV3D$rO=ZUW{;Z167nY=aJXKv|3=N42W*p;|`O~oMjUb_j_z2cMF
zp_I4%GFaiqOdI5b)LUVa-ys&_uK`;aqe2I<2t$tgT_lxd|52h_`1TsE85cm962us0
zMmD}Vl^rn(l6(2MgkN|vZmGx}XFCRRo~h;KKE$<(zsu|W&NzMc)A@&IXk$C-*&a0M
zd_P|NkuuHIn$rSBQ<B%UnCHwivOS=zNlCcAG6lD=J+N(`>U#dPRDZ5_v@~bre#)nb
z$s2B;seFH{`p(vd)=R`Ue*>dQC6Y3Pb`iXzovDHj?a%X@um!<aLhat#vj~3(>8cPC
zB_A(jC#~|JKGo6ubVmEG;zD^N6(9dN;yR;u@lSq0&6(N5Rj|qJ8gIDO6!AdvwbTL3
z7SX^pxrA!bru#NXM|F-Bh^w#d#Gdh~b{o6(qBJY_!uLYjQD9V1ukn4;44CGH&$Ma@
zAZ(LJGBoPtp`6Uj%cI<%6`7Mc)t`VWPafO5a!KlFJa1&COhDtt)LMPT@a&Q^n9(J_
z?09kUUj)*;D`Y*#NT|sxRlUwtx@}oMD(%bIn{pyylIwSsg4YRK&86E&r`3h?m?2>+
z>PMdUqlQ0Fv26eR7*X%vohjuv$UHCIa_oeSGgIyt8hQ?E9>ts0mZYFgT&~xrDbUQF
zP*%+Kf2$N#EM_(jZTjCJuW0@v{_8)GxDai=eEolS^;Owe`hBKHc@x^a>OYD+q@^Vi
z{60Elv|9*j8`~SF1`yG?HQ9(?(e$mLO^klHNg0y*2_97jyWI>+727JJ2WY$7WBK98
z$hZ$FpcQHIW(l_)Z9H7YCcgsA{3<f8LK1(|?Z+aOQtwr386<ZUf==K5jU4(>i<H3i
z7g5)2i%S49t$k^jEAoXwVy_%ONfCl$;;#|47;yAdCY*O3`lC_IOw!o%HgczqEw+&~
zIHE;FoCjpj2><y)^^+T_tm&UscM7Man6-@YL3@g)e>X)-{))h#TiX`CBX>uGunfC2
z7oAn~0d#9c=2=VMyVx#f7yVjr_K+#lEq}2LaME1{(<!I5YuE}p{Z5_-=jLhb4zRMc
zZjf8Nr70Jb@&Z<M$G%MeK`jzG)QzekkcDF-pGWtxO}I5hWVB8Nu7%R}+(pgPs9aME
z*ON#tx6mCi%SJ&uR^9K{=MOE8zJ1nOC=<JcI{5%vsQO*bfQs(c%XY8G4C;H89`SQO
zlTv@(hsddBwrHJrDZMX65&}lv9uu5hQqY8|vBAs&lH(0AfaM|h2naCXBjpb#`h6Xq
z`1QtNU!q{pDI#-H-??5Wa^0hEI`hUZ;d9Pw`(AG<=#4vHlh^=mzNv^!$@^<|T7&gt
zejp9^FuZ>+lVXi`GUCr})1R*GI1a6(fyBWJX`ZAxrlmzo;iD>(L@>a80^kIV-L+O2
zId$mdiGBzDii1nb>&-i;p0A0z&VDyrlMmsfsYJnKcrkM0vI>)}l2!F`>ugTU(n*iM
zdjVdCQ;pM66NZ&#sqU65N~ceutA2n^xFYkE*?r8!chFLiIW)uD)`UKsQONYdkR=a*
zO5jYFPo6&WWUItu%ZT?!_T|=Rx(`+--1wbzgy~DwXp;7F+QaS>&c2NXDG5@!M=oWO
z*&`!7w#hmAU3ETgy>Zuiw;8Q)0?MxU>uK(ORvA99g<Pc?^L8c-Gv}fr+{${-xew1W
z5A6C8FU;*VSPp{4V$LA&;g97?dfVEfU4bt==h$b3rX5a|(aic=ttEFIMGla7byfEN
zSB+W1L%B0{bjwNp{89-P^FZ75=)Q1a6eYo4^fVjTb2Sxh%ykRd!o_++QJo`}g_XoI
z+g}j;<c_yQSp0UzhMBzlRM;1!WdO6w(I0o>X--sCZJsO#wcQ~cwi?}2(XUFVXUe<}
zkdz9xba35@50J2QmJ{BX1Cg;GY`C_a(5k@^L`>3$xc;}7P=XfJh0@_J__(NI!%WE*
zVv)`tEz4|u5$voexljEaFk0B?SO_}$3(|y#w7c??A%glTy0Lfa%LHwIQK~Vf-%Q4f
zVFwc|oz1P4Cc;^4^>5%CgzlUmmaxs9ZsUr4^QDf8$lW3HXnnxbl9S(oz=+KuNs>87
zO8AY)oleU8E@swps7yN66xi`|g}90I7vRIkbAH*ivze{G+Hlhh+Q^+}3_lGQL}ATJ
zmpf$jQy=<-uRrU%5W2tA3U$!j-YH4pF{*4mVj}urtK{~*NQyoun4>+n4E=6<t$mFK
zTeKH|>PYT9trH(1+Q!hJG%<HH<DTDlI8{&L8@+#g9y@Y2H^=0J={Hiv8-3J-k|DWB
zszZ*iSEcTC|B>(-2(OCK@(K#5A60sQBrC1Z9V5(W2(qQepRADVW^V}hSp9H#pVe2@
zRP)(%A#Bw3lTwA0A%rLTJId@<F}tc9>Et}YeDCc#cR8PO=`D;(LPl%A?c|ZM2XBLX
zBxb$5NexAd>0D%el_0COPF<|RuM_Z5r|H3DB%0lq$-YvFJ4eOWt&y6uZlM+PP4E9L
zO{|X``1dW$=md25W3(UA$E#6X)1L^n)+j%uW$e-}j&HpzXD^ldw8)nwS4<}bd!}`d
zVWUbUG&5ez`_|?+XM(tzl4?C_4lOypnJCyL3z1hVDQ#}63Ii;7%p$+NhFp{gpL4<S
zUIJM~k{kLSF6}5$3c@ds_FXr21jb@s=lgx~M(>eeZ+7g9Di}kx9Ah*c*&B_X`x+}w
zZ?gzAeyqDmo2fV9p_fORf&jVg36J_=yi)(|vFCMW;eniV%o8Nuc?`G5CX;4xIm5bH
z4b38u3;0d*T{O#rc06Bh?{PdLd{AeV1o43v-91Z1U2Cc+&kq_iY^TRTcqt~X5%P{u
zelW$_I9@*Vx~7uL{SM2JD~zIeVEHK;vngiE60iGo9(}|SY7nm1Eoyo9R{SAZM$nXZ
zap5<5bqI5Q0I%up@u_wKaeHLM3kbk!$4+n@@_ut{GjAn=FQY!=mVn-L&}(YW6u6%q
z2Z`jiw18^v5dqadN1b@prqKrtzy67CuoUpq)-ck(-LmlHo}y)dP%oisN{a@cjawQw
ze}9Q)rR<P~xC_K6M~gMfHz^7n@Nx5bh$kNp3fBN=%O$+&>+Hj}Y-AMtNI&=LYHdOi
zoGfZ8r>U{!QIedC-G~~i^hd_a8W3vOgHgxz!HMJC`thCvl<YQ2>^kReoG_K4b2xZ;
zR>$kv6g79J7Dq>Yzj61k;?cYwL=9OmoX0*u{Av+b(&sMFhZZ=UupVZ+1#0}O0q!xU
zw0=+gPSk2xUK6S05c=0Fsf3gA>fdD3|BUxli(v$6=1e-)BpDLUI`cb8DW;In%w~1a
zgEDqH8iO7prm@ob*(l68*`!5mF1vcO=PiyxV7W#rtCq1XGnUrxS|Idi618;k(N=IH
zz>4FSHB=}c5z(Y{C}85Ou?plkP8JJ>mY6;$`&>~U)eDlkz`SSklqyzF9N&`_(G83X
zDUW-S8F|!S5l|AbylwNc<XL7bYfi27Iqv5qryYZd3-jms<Q^n$%hw&i)&Ni8)i5RF
z&@4KW|H|6nw%ZOS&r1r+jh<Uy8XE!lflN~gy=?3*nByaCFf{xkrgLaEBE=8kB3`M_
zdy=DI-K*yBZ~M+zw#nbE5v}&eK1B4f`-^q_L0qpQv@I_gJ(1qx7`D+8GCQETpHOaX
z@q%9?vJHDkBbFtO$-D^wDJvvS_L&B*De>xrnD>~nmp0TGV_CanZ<q^8X#%|fb#=1M
z4gNY#8!zb2uN};&5D7~YA<qM0CSJ^d^EUi4yP#)#I!vnQ8S>zUBliWcA0Qy(1UdNf
z!ynoNgHue(Kdn{j^6yo&)kU2xZzI&%#(6SX?|lKM?yRPkcpoknZ;P_Le|heqkkOKq
z99$h=|LS*V>(g)WZI<R-W9+*UE#TX@Iw<eMs<q<9igU;3zgSkELo!Hj0{^jLjknK$
zP&j3t_WjPsgl*`0bsx*#PIP)NX$#6i=^Y|(@0xh1>a6dMve)%w>ya57dj47Y`Jxm{
zqbF-f<Q(cOXKVd&KCaiyq7q?5?-N@&*DcIv`z*IIEoY83Y_p8lmu=VLs;HG}6{K!C
zcGL6r^>>#_t9MS>9+tSkJk@uWW#^aN+$`rIS)6ZgL;^(jVfD`O_HCuyI=i1Idb`3c
z?*jdnZX?U}CcPL5*2wq0k_N~h!r6x_My=H`CGe^!<4qCh<8kKk!;kDdU1kW}ZEFA1
zZ}?Z=A^%&Tb!BGE;~mxWTOr4TBHS=qx(KP#@gy@PA!YI#FUs*}MP6<DujT$8iyfEJ
z=;0+O7^112ZL$M`u&U@5Vw!V-7G2p@ejN=Z_Vlc?!|AM1LcHNY5kWVcDpnEB4K+qz
zyx!jTW5aJGlNNo7_*}@so$cay^DX9BmS{FosCgzMj9@G&Q92~i%2Rl{&28-aH(@h}
zcet@Um>B*u0!lqCGG|egM5?HtG-_8G15j!hT4D#=!w~>dkn(S-SRDW0|4<Ql4JspW
z>tx{XThDJ}iQP1_@+*D?Jhn)a+%`>lN{ls;TFkbjF>sd)7tXCaW!>fnTnNYXOq4N|
zdZ}E}?{};rko@BGJZdHS{r!N#<Hx0(BojS&#fOf4My6S)(8G|f)Fpefj<Jw`mWwQF
zYf2hz$z*7k7f-l8c|T9O@M_z2B%L_jDBkx^Gc$JMK`wWYCBN(zs*Z0b;(X(J$`ON@
z+6Rt}OoJ6#-W-vaPk)DiS+1H{h~0~)9#Og%;nBz&a%2(}esjGlfYM8a*A36N1e^LM
z0(&K$$mOK%^S0)><42)Sw8@6-fpE2r-(aO0dU-!g23Q<oR+8Q}x=vI?$Tc<C+ly^~
zQFNl7<SS2Adh=a||Hbkf;cIU@5E>X^YwhW8%<C%O0;el)=S##W*S)DS)7;o3gfHxH
zl?Tjn&GmYVZ|)MxJxR+aW)PngQF(C{yi(tLv(~<mk6^#Pjc$W*;n^IA2u#G%9h)6N
zm5r%g`VR)otG0evW0^^nC9@(iQ^`I$#?jW%4G=V+N@$Ew&Hh1wKz3sd1B1ZzaPE=G
z|Ipb(I+=~;|Et0J9N`In`W<lXi8n!Z${_HMh~VtXJ|a4*on9rb-2l4`s<!xuqcvY`
zV+Q~>_Zq!uofXDQ`gMa)KN;nUcj6<hOkEeqR=Ys1A)-DK#oH6t+Jtyg`x1W!anoTC
zl@V|1gyX){6G43Gu~!~|WY7Ftdv-r?X}(1ZBoGmhAMx1qqn3E1(1^p~243A#G-?|(
zGgT!KGkoC@#8BhL2uA$FaMM8Ii#DeVmx<g$e+9hu@Cww;yF1g_<qVkpS|>En?mpm6
zgwvUEok{aDp-EkHPJ%D#@S*~N=ntx}D*QOHS@W{JB_Fv6-$u1gw|aypZtS|Kz|JLQ
zjN{hQSQXx7fY)47|HT11E0jxx-kV{qpW1n+J6Ik%9TL$rbeUtn__44YiK?_&q1<<r
zvK48<qxXl;$f=M*?()apcE&1S;#815ypMd*m(^>dM`onEP-IZyXyD-G=FP6wgDmRP
zfMgmX5VZn21Gx$^^TcCtb{3jDvmnn*au7<r#y3QMd$5puRe+Ulj-~0$YWRlwIJ&n&
zoM|dkp0q7*XR`?d{kAOM>wyd5SMEdvjJ`{`w#$i5eJlRywBiGi6#kKCI73_Us6psa
z@mg?%<y}tjIxm6ugqlE_Nujp)y>9UDLkViRu5X91bY>H$vKql*qcV-Bv=JFPlvS14
zioJM+f65{JdH`SKtkA)>57GL2l$hd82uRJmc7TD)KJ__V8Z63JG_7)fkhYmZt~LgB
z=CB;B<vcr~q_NG7I)hG>T=FGrt+#l$e!uWQ1fq1U=~pd#GX<LzJHk%N=dPfOmvcIl
zcq(ts%S-=}!h#;=1s-@oy`^4sxZwQczNTaY#=JmLKe4iV*kin+Z4qx%ne!f6=I*b1
zwvx<g$6neOQQjU)5NG(3abNKRXV<`}#hypP$wq`6;rp?>DQdMWrIcGG$3I80E^qDJ
z2e{8%LHYW*%_-jUWfD}`vb4JU(K?Lb8x#QXVofR7#oT~Db1$jq-*WF^$KQHfo_opk
zg~?Yf%3t9Nb?&W&qpi3GeY3B7(AQwqJ9o9=|1a7;9|cl;wtf1#_hk2BJGU{4uA=A{
zHqHqVH>pld+qFo(vHkQ$IM0PJqZ*fPUvWTf|4_9^*wY4`vmXg7s=k`O1^e}nY-71F
z&u!Wt8OgUPCUL7%)^vN#;DR*lRk{9H4;(z;!t?dh#A_8eVQ0&DDu{OocQ=tlF>_nA
zO5P7;V7SHibugv8T%jduvC^G&E<e*$(6?+}HLTTy%d6^*98vkYh2)f0v`bDZs4&XL
zuJt})rKAeaN$mk2oyl8P1>!ff_~@=5fm{D2D)R2WYbJ3lWv=zqvNh=%^m7^d+$4o?
zR@8KI+2Sb3TxYTG(OeaouPM2_6AI6?Bwnuh5lJ1Yta4`Q6CV*g;_THrUoUpr;^`XJ
z4XDJ#xx37wpTX`u#T@IoZh(zU!Mb30>H`Q_4KT3$1~WEU{7G%&7maTwC8vU~Mbb>*
z+oC4w_bgm0B5aCdz6)Y_k{Ab$ZGi8tI_l<t@+V#YU%snSef5&qcN65FcAsxfZ1={=
zmnTWYPi7~p`jz+WeJIw?xDde~+{B}A_;)Wr;{u7>8z3Ve;Vj^p(<n(2yrD`+_?hLe
z0`2bM-LMWX=yG|W0y&Lo`%DRiU9ykbIUoM{2u-Lp<<n9%S+$RZ`~20*LL-ilI~#YG
zEb_2YL03%Dx3lCEZ`pn4zyt+G&NGq^sh?`-ZjFv2#~*Scq{S89*y5PAS`tbvc;JAs
zcEOb#^C`tRdo5cI*SL;&vga8e#vZD0BxjNf9klI3qkdeXZDC;46Ud#y`HXpItk@1w
zvs(t&j!$HC{d!;>`GQKR>iXsB#t}GSe^Rq(E~AFVO)><3H-RMa^<2R1pWIoC+e^(J
zpl0Kg*j(mr)fZM$L8dppSf;LtkJ*xK{sD9C%#uR^eiKm{Zgsa=@@Dg&B`564nm+QV
zx_=EYCP=n)<Z#{>%JJ+?S&GJ~^9hXq92uj&{znW3^01Ij1Sjw5V&cih+<csAXxh^x
zUdx9{5+AVoKt`Px?_RgA>;8EyObJ_Q$%UoxwHVtT%WB{aTu)l#O7{h+EQ<w)jTEz*
zaiD$Z1DNibW2;E^MYOE}Dz<!Ow<_K*AmMSp6E}f-da^8id)`C9cDZ)PmOs=pdX4GR
z+#ZTSNBYKBBfE)xT{o&fAP%Ym^L|vTMNX3*m*v+gRlYZq%p-#~@Aoy8Dqt7(8plbP
z0~I<;dw~l(L2uQC{O+^mh{VLTYhb%8Orx5aOK+CPEyp!mgHVnxOnMK$BvA7PMX&UZ
zPKwG_xlQi(sr~#dP^Yj?6MaJlYO6zrDbzyp(E17}x=S1zmt3E}kE2#7+i1RDF+SNU
z9h($E_RT!~dsZWOJ95{xoa?l)eRK(@0LqonzacimC!>Fn)p2Fy^1Im}Zma#VgrF~N
zp12R{<Dt*7RL{3HWuyCL#%>G}hv`)u__E)i)|K!9gF*N)zRz|QqhjhQaXZFmVfIPz
z3i|ZsRXYrx#0|#{I9w-`n<r(IFb&$qZu^*Gf8du>%uc(qh=9RNSn7w38e{3rh7%Lf
zd`}*#AQ#3q>jnjtz@a<|OS!<&dd!`b0DuMXO=Yp}lU>UIhgGy_rUvQym$PFjxo3Z!
zKoQWbRT1By3UvRS)20-GY6A<If1Ep8+~#&)ZxJ+G&vv`>Ze@bBozqkZ>@l_ApRp7)
zP(sstG3mx#u#V+x&TP5L-{%M&vRgWIR7rAnoCVm)!Rl>2-Pc?E;Z=lKgOof!VS^X8
zgUg<J^>fm*?&OkEI^4xML$62%?6F_f4aC{K(aV`%?yJmq(@_wrr~SnxCgV7yNWR8V
zKQ>0Mp_^@tnT+r6=BmkCW;pqeE{*Rh%pXnsQ8fyRyGCi;nXR#Gr$>@vTZ-Z2fWC2Q
z)iU;nx~pq(X{_Z$F8+MS|6EJKIr8^ENKysyBN|@yj{z5+T#-ldt=gz5KQ05h$PhZM
zqAp5D=!^z;dJyS4mOXIa{uP1~U4*pDuZS6jNw{JkHboIiG>Iwpf${D7Sb<*wx^9~L
zix{TRRVznSM&1(9T{TiRTzL8b(9&?Wpx6oF%ln<pl1(u9hs2VZN9{d@t;HEuS6$BE
zLEj-RTUTHwbU4Is52B$@M@Qbf$j7C6;`D7j=1LHvZH9@&fW6Q7ghmdtR+7*iU*b0o
zaa)f-Y!3%-^jGwCX6)f2-74$X>9_Ff5!)5qnktSXCJV6+(UJ^-^7LR2ky&o7SPtWR
z&D7dMnulqxha)#3-o7F+;uKi}v2KnSY7fvy4u*L@!BB$8EcVr?PwNqWW$nFo+v6kh
zk<J9vVHp=fW>4l?juspHoel=kLNM|XG<7d_I2_W^yn*UTZgV$gZ6CT+D02Ht9QZvc
zw2W^mbUKVmGkKgc6ZUy%g}MoYL{8D_*W2<{lDUG%Lz!}PrWoZ&DZAK!iEopmS@KMx
z6~?KT+%vT2J^_zEp`P!t7UArSIeJmskZ<CV1|Ff^?y9ZmGp&z_WDnd1BY912A2ekO
zm~fMiy&U)42Oq|-sMFwh!>@8C4W3e}#i6snk7`@4eue#r>beJ*xDI(qdbwmqe!u=$
zBa?}da0fTa1MuTqDH`yRtQAIAH7=0k?A~LHKSTY&Vc~HEuLPS}%BN(HOtatcrJ9ph
zvVN#?kBwS%`{=isq+Ox;LR+7aR^?*lxsq3knO7*SD7K;rDe5`|T?owQZLj-&rgh+-
zr0<%m#z;v^V1Lf*w!RvQi04)LEPp6q*8DRYNjk+#b{)TBsX=?}f7q6YyQ!YGMCm_U
zGz2{`_q|^bF1;zlY~4b!qE{!PV~bU)SZ@jWqJpTkd<XtOzQ%GG!vMX-dk3l^Ki}P;
z``$y3y?TE3kpw~?Ks(A)d;EqOs}`B^7pSrD4F`r4ho40MoPXn$OQ@@Z(Zyu@MU?^Y
zT{~AN-aI`JM{SuAS-XTG7@cM(XGJi5PIwIlc?ym7xhM|?svwnL>nXh4FKzS`lWW3@
zdtENKIk5;2<sA`Ly{sV;<4{dJzP*F7sOJe-ZozH}V1FZ_=tL2XGk2)zRYHTpO%C9P
zw<qeLm%XzL#>!~$*7p()=`!az3YM+$Np3vV8lrChbNlB64fzg#$({Y7Dli4CfK*bN
z0!O%j=nt9tt@~lWle8?`6r;D)wOKnkegan<|K<hOyH_Zi?^(hpzTjXX-5e#?Pba2+
z-+BJ9C>?h!5X^s-h3J;+KQ;uuIuXv=<F_Z)d%(fg$@Ii0g{D+om-LqqJfQUAMiw(S
zA>-g2W3s`Md%IWc2xgOV<&8U69|QrUU^L{tHaVMas{2&%&8bbMGDrTs4UnSy^Q2dl
zgwc69EuAk={oFop=}*PM9xd_pw9O(hL9;2g@Y|e-)m_zKuFoTg`~+cxw-(Y%arE5{
z0?OJNtz8e(YXcQ0w@KyTFDfO{5<$~kDnT#<vK)bRo71(IyLdS4>vtIV^Es0TYTdt-
zIT<e80$YZKh^iC)T3og<h?TZDa(Po`eHQ35=5@W9L$ZZ~`x}zuzd@VSwR%9?p`OZg
zf;5*d5@TEH&QP!V6UQh~pQdlradV{Ht${E*%5dtX|EaCxR$SMv_)X8%8%boUsY9kh
zYVg7TLpBbv2!WK3FN&;K9y_CVL7&Rk|ImsVNF#z)1x~5gUB3P6WNAIjak6bNt%=cv
z9m8*hGK#aI-Ng|b5_I_U&C{yv;^mdrF<PSdYZIGmWx%R@96>v6caOIw6gp_-KHz;m
zglixMOx7(yyvC)~pJ&D@j)aOFWnBKv=S#Y1#p-ap?bhL;u!f?FHG~F_P1A_f#MvHP
z)UQi;DAY{42q?MAV;*o>JTzpdo+M0*nGT=i+8IEUu-EvDR+zBiMEh3tm%Isb<Iwoq
z2(y)?`$1@IRWRfp|7G*?g{7&`EBlRJ1Q4#CMDdH2P7Mn6vw;e8)}wrk4DBaTcuz4;
zTW@H7m(%-1@~fYk^KsIAL1@qj|4Nc;r7<K7wLW`L10*TU?wML<W1+21!4PgcOa)>1
zk%(RG*v+-r$36nfOQZE(ufE8mA4@TUF3BAAFi(K!VUxr8h2Wr{O>>8dX_k@d*7Vv`
ziQ?KSEb*_vtXa@_Wt+G~L+1_m#|HM}t@Qv1bELgkf50{PLtN_Zm&BU(^uX)C6{mkR
zNlxc3*Pgv?ad2PTXP1+QYck{$oL&|wYw~JY&(0O(t+}wrbm)YAWxW<{oVv}u0mLva
z3-%PU3I`40yzM<*;79yA_2%s%4_z8Nv7A4?o9&e>Lu(`@u=7kqwfr(pWL5<)>qup|
z#aE*H$#Da~5;C8TpECb-XFG8*>O(NU`nsTg?MBwjGOFX_D8Qy#8BN8L2+G)is2=t_
znS>u<x}-~uO)}HPOjh4kf3S_Z*Y)A74J~Rd=L3@5<U6Mn2={LG?Y&DQdUB%@>Y4s8
z|Fk~UJM7;(Wr!YfoHJiAaeU9xtYG@qR%J*#L2eRdSJ3ZHm0n2F3e==P-Q2s;c<-r&
zul{GI)_ARF{X5(ill%v9pH&E>+{yuUY<3$5^v$BD3F}*3q#pG*fGfT}#EZZ18Cql;
zL37rNmCuJRcKKEHR-dx$*7OZ#tCw8)3$v|7+lo26vG_rPKE3H6if@<;a>xtWWZL!v
zMMl?D%>3QK{S~BUOr+u;q+hLTp@WmV?mW5;vt$`^;o3`SQ=j4Eo!f&sm>Qes$DCW3
zk}O3i9TeXS8n+-C^liMlAT1qwAaX-1^>wIo@)gQOz5<iP6Rs{p&KIxGm_Zsk;VZXz
zI_|I*J^Mvno81avjvS92hDeydXoClZgID73(%0T5<_Ew{Vq0w!m^8yMQ_mLS6UMAj
zj8sh+`riOBEm4*I;cZxZu2b={HDEAT5{TL1E9pVO#!OMj<K0{K2Sz_qH7;}l<E#l%
zzP8_)8-s5Bth(|FqJ(!_JZmW^l~kmHd=Jpw^Z#p>Vywa~#nYD<#Nnpt>tYk5INxk*
zCqgk35ebK^oT&VoIBr2QbX#J6Tk6c*ZvBxydw?Nv%i_+Fac(}=&!t9p?#hveAqxJE
z^vW=ZjVehPj#d1^X~d95j^&LztHQknuU)Qh6Q5xe#ANKl2QhJ-C^61%4-DS5)DR~t
z<^15x%Xm9<$!9EC0u!_~8x&^J6?yP87Ccou3PP_$F7Rf(RSWdR^o*U~c57L?*!fL9
zWu@?}#?Ki%0fY3`jN+*-2><<x{lh*}LWw}%erg!4yw{}|U=NiFceG)^l=47y(QKKD
z%{-01v5i8_tHK<~(XF)yvMTwzHZEs<;AbVP;gfOWYA3>?#Tavb(->1w-V<CD9!!~5
zN{VV{-x6rl?=ZC{h_u8+v7l^qBc8sn<nqy0ogUSl)54kyel9;Ar-72Y`E6K^LsV=3
z0eVTzvmU`)G@I9ZgqD_*J3sy_uL(aXAh=u_+(AX>98gXu<zE7}vNV-r-7f)MrP<eZ
zAFn5#+FXXoPMyb8w|Q{u!h24q9Ghs#DV-lo+ngGB%3Cdub5OU3lcmnY)iIP>B2$Av
zR$JOHlm{?zA%gpuJ(jEo=tA=0#l@j&KD8yv-SLmOCSkK=9jo6))WtC(g`7Y?q>Afb
z>~F6Wuw}*<*PD8&z%Yxvz=HFbJFDOh6wC^OSeyn$d~?Kge6p>_!4>+TKdPz;%|lH3
zi`pnwH*;O9nsZY@&Db|TAJfpi1rUo=#EiTO{)T|a#p7zjQ)`=&IdXsZ0w!-=*I}OH
z=df?2?@DWO0Hf5t*}DEvv;-~R!KuSbv*Aesr@3(5ce`QAz;eLhr=Ju~*E+;#q(?=m
zmVbDvqr-Aejo9MFYP+o9Zu}=}7yTn5Yq&AfKhb?cT$iH>?DB*HfBLm6AIdMa>bLcU
zFV(YD;B&5cW#09t8rmi~eU!Yvhcj}e(tFG#A$AE;(8zV-Lbs3l-P%}Siw*J1dOW;`
z`hk8u({<Y7%K5i};+K^tOIWsaMt_H-^&o<Xp9zyukG$7HL)|fnb5vINsawun%Y*zo
zO|ZYaOhmA6hS`aZ1vK@c&di65AiUhlfP2iD$^VbLh6U5_@Gp1b)kMJXJm1QCqIHzk
zy60$hkeVr#D?Y|{p7xp%ewAg37te)&`+|%L{lge!bgG}Udt=o2-f9#@zE37n;6Gq^
zBOh&4{bKvTG$@eWdXC~XP_S6tA8|cwS=|>vTJG;NJ%x%UEkgzcXSyaKRc)PWP?BLJ
zvAAA4^XzZ3P;)3d-K`0e`O_$Y`KIWe;d95&glWmwMi_I<?P3z79>T<o5TZ$l^rhiH
z`ay71g_s=H&ZUBXsu)u^OfpMeX1vBex_|gZrtA9fEg3xf#6F835fcTxqPKMEf>Enz
zTK>;!-JmLjSS^oBa*;4)m)WOT5U`0~h!pz;+iKi*G^QH(n1{%Pu9=Y@hYr@Zp7?3L
zNA_fM^WnRteOKCJvW;MgBu&(p?5u%K)d5wEnSW1%oj?Fxlef+rX2xrIU>^t@<{)7l
zQ4yq70B;NuE-<b|1byn`W{pJL;Rj^Nymm|>)|ruFYlLN{kKyyv0A5DnlkuX***73|
zm=UnnQ->2W#RUhTJ>;7cIZ==|r}X7ua_s9TM@?feDXVANPhQGVl=i_r^!{zz8%>#Y
zFB$)6^;_(WecM`egF6fi$_leaQpoO^=oMMJA*M%S4Rs+`*piM-jV8X1_YGexhnN#z
z?`YMll<Uq^S<$5dYOH|^U%+Gkv~g!)WmpLyWeg#X3!z<^cVE;^`bHK%6R>1@9iu<h
zYs@O|^x2EGSpPJCcP$}sI`;rC$&+U}Tx{rQ%It@tfQH~OYV%yqI@5DpD)b(Q`i=lM
zXDv*6w88fzWM62|BTXu&HX*<99v|9VrzKFd(KUI&^h$xOPf4gX^gmn7yK<bnk9oa*
z`NgQ+(Zp-3g$#XcHrf4WPi-r2AUdk8pj<oBiNBZ*$i;`6w&cox@3mJ}5J$txrF<cZ
zjxlYl*mjb_{A*|I^0t)H{A>Sk=jcP88GAnTDmlVp%>vyvq7qoPiYFUB3!JwxuWY=-
zc?X=rv2!?y^s;2Cs|cLT#h5<iu&Kdb@|)L%N4&RyiB%L~1C{GrC8+RfCSydo=nmK@
z<D-fFalYblDNQ>l;@1eBsHfNmB5Pjc1;$_ZsP)>CgT7;eF8NMGXd^t^9)fxiSAs^(
zU3-s%rOE}(tX*;7d913C)XLL4CTqY!7lJR68fsmSzDcxdpH#LZCkpf82n!QY!u1vX
z5jw(gb_4DRBphaMLUA$m3=kraT()U1WU6Ywod3m|!2tl>-B)=hvEkCuUvoB2XP~4g
z*0R^;6sv@s>z3n&vsOBk)}gO$@8IN;=Qo=b*`Xj>8BKxr=eNplkUSJ>QYLebX<_QM
zw9oOTpEmWQ(f4Kns~>m8w}l}HVge1cxexoc@+AE9uC}CHH^_}jZev{+s8M#{-h-h6
z=iSUfad6=NRL;dDjjQFq>Ef7B+JsY=3&$Mp5Z$v^)~!f*)V6BJ>d@O2^g`XO?F;H+
zGL<yp^6Yj(jh$<s1YK+iAt$Dg^uZ0-pm-ASduN5%hF?Kf85oZEr*CTv?<+WjcL`8B
zqn5|Q*~O93zRKm5N@g0ngGn5rUS)AV0KJ9PGS0?zB_Bv)8|wZ@nRSa#RYSG2>iF8F
z>{!V>o0;U}=uo>2&Iz8rHwZxwuXlU$j<5?*ZgkFD26AQE=qBG<d6n^EHK0fTT=$mJ
zGi9*eY2UZ$I+d^NrFR+<H|#`x;`}t7qN$JaE+z+Lj4MuoA;nrLMWDM?-LJ6_Nk$+)
z9Fr{Q26?)~=gRv|v7m_mWZmX-Zjxr`)cLjUl+rU`j*pr8+#`+J$hUg82F<gCCiiB}
zq%j)a0CW}kt$+EdrQC;UMTsFLOuO;r@GjIrKTLB)al*Q>)fv586Xdi<RsU&G{ZFqd
zI{2)r8?r1!BtXB@*>%47BxGv)BUrJv@28zR%`OKzpvJDh4AaurRP4R{HSWC#mbdi=
zL6o+2+3rnz`#J3KW!dR`daS94^y3`CSVg=htDOOA(GoBBb4K6<$u<+rW&}~20@SAX
znYo1{&2#?9%~G6B(&Z)gpe6cML}5<AQ~S8^A)n#HSWyezZ30ofF1U{WXk(^iO@m#R
z9F>QS@<MO~S(3ZdH=@S%rXO01@5J>WAMHAp7{*K7u~$)B9gCzJZhlng3}WW}>bo(%
zG-|3k5n5B#Bnk3nzFHUw_pRyAuV?)I03oldB*sx>*GtApkk1CSx#Dr>-7?wfe0A4Q
z-!&{*9-dvbyC13bLz2<0fG_$4WJAc*v^iMF?U{I#FdOJmcpREc6%@UD0q>l!fMCv8
z`IUP@W{!rt1YIP{dX>VTs`N<8?QO#T42k*fhJ=#+#)ce;s>;s4HI!J~j5%h#gK{^}
zVRFzE3h3ef3Z|35eX-AgLoPikZs~-%vP4CE*Wo4Jm!ss4Qx%=zOq|=-IX*+D`4#n}
zZf?1|r^@Gw$y0(Gl*xCfpP}9m8|#6S){Z-=xoFJFW?^Z1OKzDy(d1g^oWG#Dxu#?E
zhpOX6s9;(LTPc><cPIf_2uKRUY&|J*iVJA>bk}M)^QuJHfwaF@$&A$^_a*kr>+Kxh
zf&+9dgp8>mMvG75Z|CuUi_!Z-X-ocl9&CUFOdn}~7HhVDA6A?Yt^@+~bLO<p5ZHmo
zquSN$KZQETVuiihcP9v0lTU-N7xtDT>yF0OeoU!3tbZ&+D-!OVaz~Fz(FdWFm<3*`
z0%~KwO*=l`L{XwN$3!()iwA#%$d~p|0mYEqmQ}xMYfeLVvwrfDFRNDMb(&aoG?%5X
zMjsyXnRKL0=ewd4&>Y3};#&u?bc3l3PWVD@U>7;Zm}X;DDWukbW|{ZAhj4E2<Ej{t
zaF~k)i=9T1$ftmF4ZLJaPlOOFHR+*pTYRhh<<eD@%^YA>Ep-=K>z`R;m~D&$tq;>s
z%9C&M)5);5q+h*08+SPhB}a+s0*By9jlCbF)%z9G>5J5NiUPVZN)|`jw{4W%rDT`S
zY08IBmAmZh2_!1n>S76(xO^O!rZ^c_?$LGmJamP3tOhzv%9ktU7<(bpC96Wro;zRV
zU!(V)?BHYITlt5*>EHkUuc+Y?A*6fv_R-^-)HA{Dsx^l4VDpIAY7C$T`;mU67Dm$%
zth^_~#a;4RNykQ>HXu1?NZVq@6{?p=jsv3SwmUWBT1U2As$TS~)6M8#JjXqWs+s)+
zwM%^M{2H6OKjCLPCB@M5;mQMDry)M&M9n8P)d;pq;HGts2eVPvpgPSQ7hgo2uldzt
zYa~y7cDwv>9WcFuJwdU<nHN~lw~}t*c87fu^PAWsRj9i|8yMZV4p@joLjmZgwh{ZT
zu@Ee~&8~&3xvXgNw%?a<J?cTY698ds@nSq{<_&6ft;5ZsRHh<V_FKr?R4l~HewJ)|
z;^H6CAN`cyrWt#2l-KjUXbi6PB8Oe9(kP2(v59Y&Kr?n^U94uJ_k-+5WoEjg-2yHz
zJAKuzwAMJRH$hz6A)GPQv2OWjM`Ae%YhPaiBJysZxU*$NsaHLO%(hcsV^d+gB(uPy
zRWCL|Op{Kx&KyL7wcH;>-c9$*+N_{HZqX!_VJ8;3suRWb((*%;uw}ihRIXR?dEeM0
z)~NifD)h6J?##pkeFmsgn7F^v@L;%+sq#naFcPe*2mmq$(Wq{r^^pJv&tK;a<=cnV
z(WRvAx9QALzKQjmqB?PREFJd1ZgR6<ji!Bz3U6f-;r6b?m|VrbC9))C_d4NZ4Q@J%
z#YUX80{Je&W=UABq!s#RuT~_wCwk5Eh-vu$vE5%t2-f=#6b?dcT95*$g|{&~S2wtw
z6vm`Hj3)74=!omtthhe}4XsHn&6>!TtJqNKa8-h^T6M6_1T5N)-ft1aSB$obRB8ma
zmBP*i&HQHUbJS77JFyFL>Qq?gUBKELa;q!g$J_26!s=s;_zbO+SW9u+owoV~Q0u=V
zs3sJS7Dfj+h?HAWaal12-uT*Ia1&&RoFnvX$5vpm%I@%}^H3dI=igz*v6T(;muOzU
zSi+n112HCr?58okAu6kR4L>drppl>QED%IFFZn5p7(g;x2`chFG_Jzc6b6*^7z$G{
z%n`P$m-}*0F{o)l*zfA4a~@)u6rr4uOyo?^QHp6^LsH<hMqN&lbI5z|kXl-?2EM@;
zbhTlwQazus<|IK=E6z;PV?VQG-*ujHvl0%?P~z7wO--(^HipPb8oOv5e~{LAEQdS9
z=K95k3aoK&RjG%LH&7p~)Ivk3;)<+eHwRd<W)9YDkNJFgj;IFF)jMmvQ%PdGnfjOj
zY&NfNBQ&*X7z$+io={6z+EQATk8>u?8C%`zFr&A4fy<nlLDzUsf4sY$=znIe_dWvx
z)24bB@hR$L7s4a|%ys<B$^V4S7~%I7|NrW(UbU%5fJoxW#~wMNh&p$ukG9$16HKob
zFpD0-m*5(0R(J;ti}`JP!md?}zo{X-D`5T*&iSoZVG(bq#u|GhtuCR{g^Mzv5v7gs
zw+TVoK&3ibFgOX%MHzDnM-LwJEXX&aIvK5<2T4)B_muI!sdPIhwW3>wo3jT1qt8|Q
z98X<(H(MxJ=zMqwY6^W3TN!L}47GEFlDDh?C)mDf`PhLig^AA6lF8$(!eW`-y9b$j
z&VwWb&Ry{wkH2GbHSW>V#J4ATDN!92y<{&|Q<M3sw30wL8Q#m3aR75m?pd2R>3D@?
z*U;BdyPC0^7Iw<?gQ=~3!`kbeT>^xO)=7^R&$NF8lKvYOg#@_%Cs7{RZ~F~iE8P0K
z7odqO7~Jv;7r?5uz1F*Tn<!?nB2?Vv0SxQcro}|=lK{PW)Rl^38*)v6{F+u$?_aqq
zCTTbe^Ax^pyAh<CIUG(k^MV3L*Ljol#51kb^DV~uklgLKadPFk7o+XP>eoY6lf_s*
z8YXCMP3m>rzXdlnY5X2vW7{XoQq!lN!sSNv7EAmtcKVKG(^|%Uc;R!GG%?6k5`@LQ
z#EYjPCCnfg$qT%S8DgkFpR^%SAu7|(TbyO5ZDtlgjw@AGm-c))HG<!z)pi~^8#_s6
zowif<gvdZB&=^{dxyJ9`cyt71eM$~u0aZ#hryzpqup<0%@!LN@_K7d;K%~S(C}bsr
z4x9oF>OK2I=kf9hkcXc6hiGMP7zvF3G9=M0(tB1<`E^EB?=k)Vpv3ir@&o>lgN=`}
zCrniX!qp!{JaOnj{{*?mcQ7D#$|=Jq$2^w+Db&a@e09ZlR(0w!559}m*Gg^08@i8o
zag1E7h~9<GkJG|i460YG)(p!O>v>+V-rB!IqN23#qZ^D*Yo2O*bjmv9x$-dI0o!}W
zYf5t<1>s(Gp!4G8YxBUcUyPF?Wp2aFT6CxzrWyN0Cim3{BiOgN?_bfmHU;9}BP!He
zZ<Z76>_;x_7(>b`J?0YY$uYSCD?(H2akwZo*hN!U{4^<h&R@1ge9Du)gBBW4?&Mbz
z6DTfqql|mLTw)(zVcIzwnpSg%MflTQehrscv6|@Dc2XbOvw18Y9tK_8Cj7u0p$%}@
z4Wn<dpEs-@tTb?islzvfTBap~dR3upUZ%sYCW~v9?o%543}9ZuiPuU|LPMS(%x6xD
zg&->NYrl74vMFl%7&HsNBYbnjIxDiH`?)*_ti^e?99=F3y;XzIgO1ytfULpeHH?Xc
z>och@3*s>3gWk5>$0IrPtLrnbNjY-9jI*Kg>Mlmi@nm1A84*_#w2{z%R?9z@5QNNM
zgz)UY>J_nMBq3?l?%8kc)5e2h*dqfrA<vxpaw7q=cAco|4}1MbL>BIHx&GZbeNJ1c
zwwr`gYJy>35io4kA4nptkYi9o(>-X9aZ6q}za*)70|o=nj89{On;<#v5Ln{|4%95!
zSs0eZ@!Q+-r-^l4*#)}#HKH$|T5V<0Mkh;0T}}OF0Dwh?B*tM-ySd}52e;h8m8m!$
zyx#@hbJ-dPjgl}v3N<cDo|-e@E~rLQj4Q6ttn9-XlFx5n8zwSiH2Z&W?!uf<sx1`p
zyy3BycJU`|G1OC3rD~#O#8CbOjSJCycRWZkL>HQ#Qx7<7Y+A2A9eqP}Rrmx{CVKCX
z98H*N03UL<`!n2$TPN+2pa>7?A?^8lIc67~g{G|H$LAo8GXBQ|K~)J7R$ZWNR-!QA
z#j5oeGWF{(NQZ9UI!I>49>uI8Bdv;#Xn#5{bI3NDlJbK)d>gI^1j$_K*LESvDX#0&
z_VoMYtGMQ2^}YChu9ZV;Um(K4x`7aU-DRbnU3NzV#ME)!`=fYnW{!bAdxe8rC8%K~
zl-l&Oj+ue$198+Or$(F*nbp-kjvs5^EmqyHKD#6pr(1R!Nf{t#I{cpM=5WNHHugR>
zHK@#k>qk=t{OQWb`;o-G{Z_>A?{%J^Om(5=|ENP)k4T=M5Y#h%Pj$806HhYn8YE&P
zBoByQAS9`4hP6L~))X1>21naNd%$x9HW9ln^Y^3aMKAb67sg)J4n#WT`jM4?|Fmnp
z#TnZYy*T_fvBn)_;0|L?@Q=k0b{Pb>e;chZ;BBqBiL^@?AlC@Kxb8hAze*%k343sF
z4vMiF2m6tipxbSWT{5^lHF-6df^tzcn)>4lCbfN2RI!-=d$NyGrB!3ba>a^XdR&@C
zL0NsZoOhB9FALAHC$f%HjVCZ=4RAX)1X``(j|CbutuJg()(Vv!8X|I>M|q)F`B*h`
zT3^iop1K13k@DE(!=4@dk>e-3C2qT-OE#sVb=+Vj#9zj%%0-&!A&G`g+POS`ajixi
z@Kq;k&qh?Vb7<f@GIyaGoSM@_o&w&M5&Af2MqBe|^^i`?7GjIy!_9EA8z23}dyU9l
z$N3{CD<k%(wEIOQvHKsF{eS;EqELZYdE}@YK!9>zJG@&?P@x?vO1?ZXDLwHfO`GbX
zU<kx)4h=7`O6kT@<!i^PvS3t|;);)&qg{q+@<(w-$3mbz+m2<wckgG_yUk;H&|}+R
z8P}r4ck@TjG(YvfuyNb%Nx|Qrt~!P8H~H^PBmWcP<@lawME8`}LIuI{lG8m2%3a=;
zV9a>0s^g6gIl@mw0*gk(X1RF917PfXHF;;g5NA|MB?IlH&^4B+XQm_1Zr)Kn&(f^!
z0e*m|vG-n-zi9mHb5(mVMq&T`p?~dH|K6$kNeD2Cq`W-)ZWNZ^7(%oGr)sxfS!L(3
zp2xaRN>Qozn?VBTiYU{2rAR-`16JC*f-bR%P>N+yVCT|d1ZM%B=hcK)>D@Z6?&4J0
z?x!Gq*~;SD)90%REihLp+Lkdo`G1sRA(a>-wf{N-x8dt#BqmJ4cg|+o_x{4)>*ybQ
z?7yp%{y5q_*xonc?)V9&*xy0zKy3QGQ_5NUaMk1w>~-HEhr0$6^g578jorT6zwHqA
zeITe+HX$9#w2`vs>p_GW6bSo)$CpsduSF>AuV{-IL6E~gK9-M<?-}X@4w^$;m>Ti-
za#Ubhb1`O}FX2A`i_~%IMlSY78f0dx+Z9EUby&5;O15NvJhT;qJ6U`amyIMiBv1Tc
z{78B2HsoNMjs1+pjj<G#+&CacJXHGTfByQOa0>N$Bsd+(rbP0@pSyqgoDS4w3uLRM
z1CVe-<#Xr3BaB}l;`<xyh0o#d9fqptp5t3mrwO#pepCa#bQpSh*S>#xUd13Y+zjuB
z*Wze>6%zSQb`$*8MlsU)zHmqXH}tQa05VY{lujT)l>GZ!i=&c-x5b{nJLh2uhb*FL
z)Xu}3l|ra3<d5HkVPlbUuiO;<OAxbsLk=g<i;N7ORdKB@u=GABk2CsCbwTAHt>usf
zA;|vP2$Fe{g}j9)WGSCK`+FJub2X}dXECB%L;r+o1Bv8CQeZzKxt9&iazT5(yWlZk
z_QEK8OloRGS0jRIXe2%bvLxhfQ(b%fXM6)fLuXx;N3AUk)d-*IOl<@yYbxwsW6M5*
z*~saEc6=oOVV+uls-{g)TJ0b#`ym~UPg5}QtN?@AM?imGK_c)j-Tc^YdeOO<V5tDw
z$OmFTL9+XFOyvIm{>G{^wAGoSuvTo9ub#X>bu|hA1Tp|DzV{oZMAJtkK`uO<a3K!%
zfOZrz24fSopaz;U?DLCdRanE+H1&P|^RRLK*D0gx;4+jx{A3go{f78rFT%_)6YSPK
z;(*nSG&>^{t(ryK{{h_0Zz|Jb2%dnys$hEsZ}peMa7n66dm4(YYQB`<f7Fn7&p<oM
zUi=_f$V+S~!Qz#NAa_kHHB1bsvhOyp>#sfBx$(#lzDB&J^}{U3N;p+&lgb}<!7~!X
z5MaF~hq4{{Pq&3lDEN6jfCN4!0I~bPomrAzE~LpJ){n@R+q)ZPRN2uKll6L+xMD*C
zo{@JZfdy~*xU0nSA8*26Gu;k8qxX~baoPXEi+NL-nme$8WOT#@?9GqY=0TmMszXs4
zhX}?lfj`|Q6@f6Z2?n><BcupJP(&TH($az-a~c=i{vM8T$<Ir8Gp-0T*?sT-K4`N=
z_to@%gRtaR5FN?pOacLiWs;{!3F?CMzA_2hh$XmmJ(<1w#O+@XK?rU1x3r-|8<{mv
zJt|Z<8QsjRkGBE1xfyu{61#-bIUly>rQTg$*15No_h-+^^exh9co392R_`@eb^}2C
zfu=9ukdP4_<B#G_PFsQ(xlgYau8H==T^k2@y`PY$;E~B<wJVlV-m6j<9U(#mWWNys
z@29NjA6UuhX-n`;yY)`1p1aMjq||vIsgb&#*g;j(+Vu$r>uG6juwnJ-`9Q<nvH0b}
zlhVq!z*ulJpHKcYG;GuGYZvwcPW8buJ<<FM)N})*yHQl2ExhPGdVqY`a@u$X6``W&
z7{Wk%TXD)L^EyPLb?Rl<zp@5#Tc6(Pz}D4emcxg`qfTcYxSe@GWn!WNjvI8E2LyMC
z-tneild1Uw54IX~-5<~c4i^ZyPjJ`4t?9Ut2Pt{-r3rt<p1t@3<%IeC@0G;c_Qel=
zI0%)|@hK@SgjW5Ia$a^4dP0}qhneHmm0*wKQIsi7K$K>eXtVx6USD?F5&*io&Rm!q
z0XI+8+imYF$7K6<VHj4$W@ZUAnVo<1|Kc@*zr26p^TQ1RJhY3;fvw1-%!O^#n(|ma
ziHfMY1v^_U$;^1=KTGXjrTG2kGh#f!pDsrS|C=uRzs^KHm5{ky|6YU^ZRcMv-7D7$
zyD&E|VOj~+?l&+iKS!cIZu7^u+EUKE+5`NdJ!rCx@ggmde4hfMd~xhJ)=S8Wae_J*
zYS2$01ZAK*i{w~itj@?Aq-}!OqZ*n%Cl2q2s?AYJoI?QW=p=Z&-emnV)uoRY9xXzd
zl4?k?@>`9D#~zvn&hrsP9rfQTyB_Kk3*wCKQE^;`rx4&OWBBynZ1#{N0{L{XC}8o1
z!<C%wPRBofC|v&@Z3*?ieua3weAv({M81a7i4(#2CWw8@|4`$E56r!}a0+EpGFOXl
zR2&hw4$72~gQ8{Dgw=1rh4BX7_V6u+*q4YV1^>)$C5zglYlTTvI=SWB)Ze{;Q@7TM
zptb#kfF>3J`tK4E05+<Gr+AjnAWzUBWITv|g3^|&gI||yHx9h=gx3e`?BOnYAPnIs
zz;bDNQc`*hkd{CuLL5_&ba2`7A@ey2$lDg0K8MACVITAn`LAHsTz{yD<G;6-D}2bg
zj57MDl`n<*%JpB59r@3GOB8Em5!rN@a0#iRzrz`?<AOC^Os>h=VoBst9C~;rYm&y~
z{-G|f_6vH>aP()>gdnwbBonXIN++W2*o4}lYVyMXwI-BnC&<2~HxE34V((uknW`(s
z=o-9DPhyc=o(;beng8`Aj6|v4{p|?o&>x4z7+WjWb5AD;`M-=Js0<aqS8G3Jo2=Ka
zzblTTG%*}UGPz|eYRu$2^-Rvr{IBX&wF=49tor@)!geGx2>$?drE^R==J=qdacU~Q
zv`i%C+wDKQmrXw*{dCdqfDp?~6L+N;;nOlsHFkvlUsWXxY1cTNfvN=>B_+}Ol*y>-
zZX!go+HYh*Eg^x?W_8$X5sBdvx-_Z9Oj!#NUu8AKctYknY%@K2wI8hfBi>B2hr}?0
z?_8{MX98L~S>8R+j&v;6|FpRX?S~#cP;tC0TOB?O+3#i5=CrY>=)xN_AX|O`IHkg9
z(W8%1sfo=<T+K;FNHlm{Mz^=K_{@ABDD$unvTB(-(=sKB=w*m^XLNmm<y5x#`Np7u
z`I|&Ih01QgiQT35h~lq_o$S6EiC@sV>AF<b$r5q^ZDfk+I6DqjCL%NzX3t)T97eol
zNch?D)FPZ_z6`1X8XV6D(9hNXQQ+n4p-Q-Ll40=tqzN7WPps*4E!C$8NFwBz@G^Ma
zKZ5nyc;@+qzw(i-4}=u4-*aqB?>h;hfd|+&;Yj!hFf3~@U_6Jj;`Q&fyg$PL2604&
z<6#g9vW;v75J3M)`RDll*0&-bW`*n)(rV&t2l>gf3NXSImAN)Be|u+Mco!y|wl&BE
zd0;!XM|GreZ?!9P1{B6|i|iKK=O4~#HNimT?M;xJCD-B(s{{oM82p6x2okItv#T-{
zJjb}F#h!XMq@vxa#VG}SpJN?%5Dsr&I2K?{I}hDmF?v(8;wiKhXS>7c8Ybpy=W?A%
zC;YAeI$=3S$<I^kDLpBEv7c8CzRe@VQ8!&;B28!N{m<%H4|yTqd`+$3LUo@QJW~-t
z-z5I$=WCS!_gDVVE-UwuWj}rHehR5`EZ360E08DcgB+0Ey75)r$=)kPE2(K9K_+$0
zJy`=`1J@C;mnwH`K1vi)-a8qxY^8VG;Hr_``iG)L89}_uJ<-y)Q<Ra$to2<ml=Gt%
z?73Y}DlGBaC@s24@jz-xIPLIUwMG=w5?un<$(&AC1}bL)zg>khQOM|N5+ZVjIWcj1
zL9Ah!6xrT`AhABfen9A7gE#vln7kQcTgt}~_#oEY9y2HAOTM}VF|OZWNM8#c+z~y|
zr}pr(RUn~$3P_Swg+Ku4z@=`sS-*}!4#zr~(2P)aog|8aZuqTSkd59l3W>cT`ZmJu
zWdVhC+_m!(3)OI5d;+M-H7HJ9W0Kc|)q(@x8J{+Xv(N=nVH2KJ?JShOroV#3c8=ut
zbzl0m`=hBAu&G^)3SaZ<zOk7ST0rLbDiiWGT&IX|dFSx1hr*nGUs9LF0;fkGSfH$-
zRTV!{L*}~HhL`<xMPO<t;9+t4*zoqPttD3wL|qwGK}xwgp7x=AYG&n1sApDz8zx~N
z7!|*vx1x}1LAtpkCqz62jtX<qASe7c;kcFKSKo<fD5aH-7F{npo16#lm0Hu3G-=`X
zxmzUIwd9fXSo+x6cQz>oGEzH7rzT*-mmxFFCnu0y4%r@GsD~~hx%;cryR8_}a-v|8
zNGJh+ZwumE-40n^Ib;{+PhmF+0Mc6Iq%3ttt-%!X#m`1+6*-buD}!v@uHwsR$)M8-
z+#mU*c(z_cp)~R}jAvnqWu3NM<M#NOVUUrs*Jdbp&aJ>hdp-FDYNCH$<nq5!Jt=O^
zdQyyg5`{`tw}#OBilL_#PWEHa$&lXjg-ub$TZLo7O`L`lj%<&mFZMcEwbCw)Lu96F
zRe5RftbQ=BeBl-CJP6+UX)NI~04FM8^<JKhdHl@f8v|NMVb=A+5~ePdq)r44Gb4U}
z<~=sVZR~}^iD-X?t<AV*1tvOL??+HQ@p7abYk))xA?ge<7x3BjWN&qu)*XBzPgz=4
zsYlkw8nb?UQd*w<-f%X#s3I$k7(+>*(qry-g`gs~rw*)IH6Rc&Tjx4?u`U}nEu&xx
zQ#}HQ*xfS&e;s?%ROp`6i?ofW5e#w-1Z9#7WrF&~vl4#9Shzkn!5<O$4c8|bDq-jH
zFbbvt+Ar<!@QK68!b64V>gldOQ1yI-zL*m{gHj}%t-|~NBkQfBvP!%EVF}@;q#J3Z
zTM>{FX)x%Nl<p921PKMCK{}KYkQSsBkxmIgk?s%?B_u`o?KAVt^UU{s|8dP4*Frt#
zT-V<FQ~QnoIzcCll8(S7Om*V6A))%j_iqHDM^rf{pqI5l4ea1O@km~3Ll8JbCz5h5
za$|T9S*A>~u?bz?J{0`7PQ^ng@X{@W86^4iusj)u#imwC7pdfE7WuzDk|V?TIN$nM
zoeX6uP0Z{3i&jAv0+aR7jW(NSU*4Zu!>UWZH$-qvs(*Nl%5Lsx2&P`DiD!<rB@Rjb
z#nhn<ZG$=wubV;4jc3XM??wETa2e6&e8ch-tBaMV4N$#0sSXerjM^lyaE@8LAnr5n
z1LW&m+DfyEt*XqbNpJq(nu}~$HM!Q+iJa1DKO6|}g6YhFf$`KrW4I3fzM@oFk}ky3
zzwYqqUEA&;U;0gT($-`^ckhG|Jf>ZT{J+w}j%by^0qtO-tdI;;<-nWUhrSK22+&y7
z;t9=bZ0Gyvb|?kYw=dPBC|GmQ|1>tiHi)Lc=D4+ZGLU@UIUkxL&VW*TfDW!UhhLx-
zS^lZ=Q&3i6`+UUJ#!90lMcsl#rU4i@$_|y&D<7SZbk-3O@Kwsud^VSro!nOZh2Q~h
zhCDeBP|WiSxEDIyQMb`kuCRG!sD$|uW|y`nNrp%)>G_>Y`=HWG{+vR7OT6{4)d42H
zo=8W#b6=JQbOMrG><kA)-#a`{X@SPV<s_%?qy~|4{n`6#DB6coKCusR%WQY`?j@gR
z7Fx`LOz>3pv1r8vWJ_tEb2?9C-l4Pm56urxBdGOrP}tZ8N%#B*peA++yUoiXjsIgE
zs<Ze(%zn_R{48s5Ab`cdzA3*|6OtuXf5T1nlJy`F&MG~Dx>LQ5vx!djH|8Kps$-_1
zbpMy;>H-(|^nbx-xZ^!2jm^celO<H2Rf0MfAS|(puC!1R(Q(R2V{{noWwA2&UtPY&
zI;3;-dV9tlc)R{ocD4&2)e^q4vuR=wQ&m+GKB0W`AwTq?LHQDG4X|xtDur&b-ziW7
zAa@VPF-++bxN>b7SwL(0rfWpk+sOm9m<hw><RL%NQ*Dou>0E=2Hc48{ZNQ><edGmK
z0cHpfByWa!w`q9Nb7zK;*Qv9HuXl&mQaj)U*UIkps~9`NryO}gvQl1F3&MoEe^51t
zjA}GgLx&fRd)VUU2mGX<X*1l8nGq{1cbDxc>G^ezMpRF*X-j@%^kp1N5QozPuU7Il
zZQzKhR{d@WTZ&^pMQM3P{A!D0Q|OumGQgBpiE_o3!7`?zNJH(vk`|;p=EJLTf|l~6
zN5?+<T%LGy(L<nMyW+n4(;mM+`z<T0`pys5I@dryMJM}h%S!PIQ`10aX0&QlUb#`N
z$ZXGI%uA2@1puzZK9ue5n@NjHWJqHuJpF4k&Eq)$eC?auQ`twZe{UnEW(qjWO@8Z|
z{8p+<SoVSyhj65g!DmmVF-T0a4@Vzjyqx}GqfUw~rOL|X^YFRiIZOj(>)*tyFJZaI
zsDVa!-tuW5@w<FppFn;eUt~dY)6JU|oeXu$3*9pa&{K9mIvWrGhij4lR@(1{(i_<L
zr1R+!iUd?3#j?n*6k-}O4i5nI&SWgW_{X-g?}tFCq1QpwiW3LqS&t2q$~*pqgqGyq
zcw)p6vc6!;8o0|oT&gd)bRLIlnDn~E^~Xy!qqm~?qqcT19tGu(3A?g~uHjp0CT_>`
z$_O`U1jB-YtN9HM_O23>BP16rOtWU9NH2#RMeUtId)sNl&GWD-+fy?XHzJ*lZudd!
z$_iyLmEt)JJ#M0|2bvVtvhS0%QnFv_2xpt&xcq2!x@z>{-P5CZZx)gl(P0cA$AHcC
z1$U$m?^3ed%JnkHZA+kiFH5PGDacr7Fu16=?!O*>Bds$E)nMG#`tuTzGv@{-5r6wi
zH(f?`*=k#xsIkBki(ICvWiSWW83?LR1&&jR^S!K^QhMGl$$qrnpL_pJzL%;+G;Z_g
zrA|tGHbKFa<=1jwci8m!*3`{V_(wPUl3{&hxSS|+4$pta5;;7WJb5uPUjJtpLe7~s
z*)X)se`D*DC=)<as&uIU5?ATO7s7R+8Y8U?qq6UU{Chw-Z#v!eDoECg<ga^;W{eCn
z4LGuHPY}1h|6_WhQZBY20R{=WJSl}2ssyoPErF;61HK!C;h}`zdhR#MxRm{>Ja_i>
zMO@+Ql+jO{ir7Ks?}1$#1F!e^qifEsm?*tzQkmDE87z-*@^~e#W$(feuMc}gX?-KY
zH^<S2;rjxxsI3lWY&zRIhj#fzNL#Z;6?-N>6=bjhxbc-mdPnkmgjxNAO%Q#wlR|a?
zU=F>Y{Z^yq8ANh-EQJqI_Q#<N?TI%93j9}KNKm=d(vLl@o@nLygf+(y6s(?{MF})z
za_+A*j4zhW3cJ3%X~uO?(2`P3y8>M{v%(vdn&NA5{?gG1C3*LLjT3nWFY(Yd)wc}G
zYGworO)bV6*Tywpr2Q($UkS|xRDj!7&-B3%?Yg45yUWQTEN2hk92?nvX@_>otwSWA
zMDJDDlH6bfUu{B!i<CcaC5c{-AWa@WU(`{;mHE%iioB*!L;%53$s=|JgM;E<){DRW
z>UOKxB$YGZqw8XoG!|iDUu+rhR_60KT5ROjE~HtMtS<`Zz$<HU5unri2Mb_so&qS0
z@C>kcqSk5Xm~5S1X&5%!VW>0m$y?31jNZIjTQepyl2ce=Afa>^GEYcA`>A)OGv+)#
zUjY&=7*-TlUSZ;ocIn%H9Uh(*Danf6@tWA8B9;%+Di^E&y1_+xitxG4)NPYWt_J8=
zKc62hxY@FJ0D0uDj%8J0RTt^0tnFT9Gg)AdfuLxG0`UP_ssEH=;@u+NX8-VY2qv<Z
zM=PU@v6zbau}TJ!oe#_Y?1PT{@LAD&-k=9olmx=Oo4GUjoZ0FxkS~25J1teMH>Mz8
zYp0<1agf-}H#PQYoM1wE@~F3}IW)Z9yp5I290d5wL~4DaxVvaA76Ti_L$kr(^=~ii
zx@%p-TT1UPaKy6>@%&)iS#w2I3i^|NN^OPKnokjHZ}US8Y|0eg&l{<3s^GIZ7b`c{
zS!`#gDyi|m72X9@+PW)E_01){vxGK!vbsK=(lqaH_NePC0Pou6Vt;Z%S_V6BM2+Fi
zl+N>iZuMW-I#_>o6v1I6a?^>9+oapXV{ehM1+_Y2f{Q^)B?F-y#g}_`3$GKlh&iLa
z?p*rCN~&RDh*B3bDBI1mwAHRn-E1bI^7zxh%6&Ra#h1adZ-@W!GrT89z8{4}4zuh@
z!H3&O*kz_b^u1OF^~cS3L?K8SlsLI8St02JrJrbp3ED=qZ%fw}smBLEnsqgjvR>qm
z6rlK<IXeAmQGBmeye+KpA*=F!pU%i!r_fbV;<HWl)BZn{Rrfud)^iQ8_@#ZeC4jg&
z>d-))lIn6Qi3yQ3t9tH0gYG2Nf4TC|Ch=mhCHF%+B&XAXP0zF%lb$AsWA+2<>Ar5;
z$o3pZCgjgJ3>pTye5kW9tUhhi{bA0q-IS$s)09pH-&KVsC6qqF%=h0@Z3Zr&N*Ud2
z`9_g))_6r;|1@X+91}YQ%)v#I3ia1Q*}MW~;%%j{LuwI()m8?-c#;J4rHo%!?b*@H
zQEWcHY@Yz2e)&C?NjMu+#wP?*BQybgv|`JW24UAV48#2@9GT6{-!}XIA#v}7$B<ka
z{yJ=wF(fDyo4-*Yk|e(T9Kg6f)g}=X0?^~f%L%9}ABhM6QzJI2k^CF4S^t&kk${<&
z2}{)`lC)qP&R@JGm7Wly>6>s>+@UQ5&(Qh=#D;q7etw6R=N9ew?5%YRm9R+m5!H7X
z$FyZ;19bGZ*6Bd+mHGUfHC!OVYvHjN=_dhQri)6I?=F~gzd`dcQ3v#(^v3aLhAoh%
z+vvvmc{SVCml1Mf9-5#@ITW=x5Re4HNxre=&_TyVp_t<_Hyu?HM)o&I`L7b;j-6x#
z%&RY%xp9$};>}Q_5AuA<xn8Q<236*z{rL@+ZIxcD1jHF7?;hh2IaD?*Ae4=V!7gtY
zlRX5>Yw_OqQ`oVRzR*hZLyP$MhJf%Tf@+AJ<gfjjx6AS=Q{3J1<IxFWGy&hrk%b3B
z!YJoZ^m`(Tim10LPm8M@>R}!lTQdT(fk?f`;5lMWbU!oy{tAYjJ$rnK6A3<z-!R0}
zc;28fy?~5DcVjKJQn$gxX;bhHJVSI+J~ote*hpL}t?{i`MrlbE%}i6i0SiM&3B6jU
zRfnq>{sxE$&bC(kvM6REOtfj39IDrQ3(3K1diYj7arFdhi7iJa7z+4RTT+uNd(JgL
zsy0JJBVDO;V@0+>@g@9Uf|>XB4xnCdCFXmuH7%R=N_zr$Q@@oJqr0w#pQEj-74dHz
zn=TX}te8iJf90MAKf+G>_w*}=ZG;_+G{kuXqkR6Um}IR~c|}?~VZQzry4p29kj)&n
z(oktya0q<m2d&pv#EDruwD1>1`L4L4rR&E0fYNO<N7PTelTZE-ye5%CZ<)&$-8(>L
z#DefZwdNd=Ro0v>8?|$#N}YNSfm{$2Q~$W$s4NAN+%Uyg0QJt?lG;{4ii8tz5U;_P
zFd;VOWYyTwqDl?21P?7Y1~HfEhREqNK<pHaEh9J<9U;3te`puB4{?3lK_~{Ru4hrb
zhX~3q<J-6jJBUj^1@C=gZ`Gdsh9I>PiFNj)U4OL^8O&`>@aV=bfV*xAmYg~vpaixb
z%!4_bAVrbrN3j9&ONP^Vy1zNqz2<ScKKssZaFMRs`R*I9Uc)$ts6#*zs!Ku&X3JVI
zG3cFZH2q&KXGxg1obyA>56eAI>wtBor(!`;pD{VO;EVp}dGC<jxhKjB$QWeu>FEA{
zZ;v7Y%#s)s#qFGt6n+nW@6}RnJ#U2d^H+_GywTFlDv(b1_8nxuXA;p}4p&cIX;EWN
z3G34`APr4&khl;`qqh{<pfj6~WSc8VSxWLrSYzuAKmPp8gDS4`g-6QE*YNiM0rGIB
ztYW*BEhH<pW6l8JMn=64KvMVEH8Uuk<89QId6qwlk4Bo|Gu^%tdE^P0**Jjm=A6&H
zv<96}nI1}1;RaBIh^j;n>Mehy=b~dbwbOU;O)6v_WIx=G=rsC(_o`_pascEdd+wnO
zKPCfDwx3AD7ZRFa+ff~}4Cq<(rWt)3|BGRp`E7o_OS(S5S62uA*)ZGF4DU>@UO)R`
z?(~V1Yre-C!w3Gsj`4dE$GlXmGb`&?4f;G!^F6I<H^;5wu6#??c_MESPEVSNAS|H?
zHl2VPeL;jVyq5$I#<q5sWzuxf!9h1JNOX`~eES-Z9N@xf4$7f_r0SU=d-2)ke=niv
zE}|L#VOyH=x4n--U2gr~mp{*~pz=5#`-wb??P<7Db@sihZ_<Q{dHcaWII(_C|D0Q$
zj0pWyWthPZ*mIz((}dArl$CYw+~iNd<@jy4efu9>8@s)8iAQ7y1Z-R)PZFZ!V#v61
z4V#5Ho%dA}FBRN0@^}L*(x)&&T1y}RwE2z=9;_FX15z*sjlOuaTNziuyC8CJj)LyC
zyISZ9A?yJY`hE{EKBh|V^spIDvA({!2(t3@((p$MNUQ1$cuJVTMRhAsBf7OpvCH|l
zPh3PI0z3;PnY@OtuVP0b^O}NlvB(&QypXO><l2TM_MAb(jWR&%uOaK%t@<63v^Tm*
zl^HC~4g$|otJ9(AzC?oK>j1H&d-M`wD7jZ3(?p&Aj769~TUrb3l*jEtCqRP7ZS#S~
z4F2r+oKM{#5TGYeC3XBO=x}@&!TONWVM`k`Ar|5ptvMOSQTHqvpS7~5zaOVkaNn9D
zD;4(wgB|?PM#@a;jivxya8S8eMITy(PDjfg>x0(U8I<38k$Z3Czuam1cNa@euHx$(
zv8MG%OXPoV`0o}Leg$fGpz4-^uYp5xftLRB8jwt&n>O`wH42>Y_CY&B%>zbxuyONg
zgiD`4HQ(&uu>Y`cY2S1A>KEE5P6G>H5Xfm89&y#v2<pG1qs!W_R<5aR#i6J9!!vM&
z^Ja!lcC<6;hO{q(L5~HCFZc?E5_tkY!EWeX&Y~#mr-yLQh#tG0o&MR7-#NQ&e0Ea6
zrH5^FDHwpz(yw8jX9_1;UR@NlBm*px7A|dz<OlcLwC?e9<E5mJOsASGOgp~4zp>i1
zP3t}gEIh4iqDl)-X_;ks^K(>8rDYso5E5(zQX-gI4<ha96<I$me2;}jX+nY^iyE@P
z#BWw*Q+ukTXBu+oPH$vSz>6d|?ip_@VPTaRkvg1z9CltFTQh<-L%i~Kw#?om^;Or7
zEa;<^EXy;rr%w1&Nijt#$mx?w#^ak*F_lB!;H7fYbcnJKX-><!fP}xD=U%IZ8Z+ii
z1-VpcN&kN6MTF2f@wDp}{Fk>%Qj%-@@5N|zE*Qk+lR!+l*oT-x$X}f_xZuVhoVHz3
znMH~(jq2?H7gaNUa+EH9S&+Lq$BR1;`tB_xdc{o4US8zlt2R%^Wl^b6H2$_33vkdi
zTitBKLq47B9Sq?E04d@uX`JyS;S%r(5V2l?#K>7#-0~gS*jXoJl`EzHMt%Vx2#Z|y
zF&OKOLD}LwW@uY0Kh>;hpiX{w*ybYZ9RbW~QFZPOz?OMAjAx}#D<r<imKER8WKpt>
ztj?r*fN{nMLQnh+e$dJIi?edR1!~g?)Q7)b(0?yH(ZWB1SwE^T?6Ytzf=^dA?7<(O
z-xt@=9dSO&dJ@<yoi9ZN3R}<NB72<?(ohU?mO210XopM;@k)fw@kZH~d}f$}J#8DQ
z&F^V^ZtQqGhq~m+bJW7Rf@C3}bpNDsstMeo{r8oXL4-W@W6feWQad_KBtfPsr=8ws
z?U`q6!BOiwdj|)En4*<EtUC1C>E)z1bE}_p;BCJw<?44AU~2@Hy{fzCHA5wrBN3Cj
zwPenljAXZr^DKvu-x3PERxDHw_G^CZah|`FM~Er|G^zsa53bjK8hF(W6W`MDbB{i5
z@A+WduBinf*(%hn8GEcLEa3}({5_)NYRS#F_YwBD{<5WGyZvaPu5~i4G2C`B^&*xa
z_N93&?YAG@y{yXl{GL;FuR(bqPLY;1N03`Tzlf(zf2lk*mrVc*e@ZZZa<8|iz`&(U
zC(CrbD<&GNM?lyN5vnXW+WA#AXnhqyQIg=riw#iu6W#66C?TfOM`ouKo2Q9CpAqoa
z-0`O^d%@t=f>CvCj!9qOAxDfHvAT#R#R{*8t_fW(BbD(XAEN$Iuf8}V($AOpZBmW>
z?^$J=VaQ~nAenJz;Obn*(2auBJ4Uy%h9Y~{f7CYaR1P>$TsbjKFFmnrUog^Oteajo
zFI8O~t??xtYOLuLLBingj#0UXxmIktLCI8G`YrE?`P6m8@cqSsD(8SdnIi)}T{S78
zyPtP~O&2i@vvYaKXO>C92`&FE%>RhSbEpuhU{rR=<flcv%9&Q+BAuc9yhnaIWdA?W
zy-2R$`Im|aV%32a<fMmHb}=-<tfwCzJCh;#dWa$l5k5ix?gC9t6=xPY@7rW9{*^$?
z0gI-Pq$=}A1<|n=uhF$J&VE9q;8s3q5ZB8UAr~rAHUGg<L>9YRm)Lk*8kRm#zA>`z
zWc{l2to<-A0Hs6P`%eDc2akWS0O%>YPJRJ_Nq7GxQTR7x)4~+6pda{4QoVIo$b!4r
zTAG5du-9Fd1o(V&r(e*uDe7edSzZ$2s-iJQnN+T}rdfR##xd+L*8qE2tr)ajR%I?Q
zL~i%fa7XGV(6PjOF9<t(jyk1}K>^DIoLn?A|I8Yj9sP9_uT}9p<)~?ywYl%B;q!-#
zN||rGH|YX#d6O7<?}v_1(8q-nxI0xH?G)VJU8LFE@3)(4e)sdH*EP~9i1tXJVZu3X
z3m2-O3Cu{fA|W3wL}XX6eQV@Vr@lPOa4QdPy({u9&b5Y4p0M1VZ0UTM<?f1FAoTAv
z2ft0Wlko+|)ac&d)hQ83=BiI~)~EM)xN4p%JMe6C#6v*0MRoR>GNlr8<$It7mr}<Q
zO0I&S4K`7zXzA!aQPk9_3>c_&K2JZUf@cF975aIuN$S!yd(SE-g9@=yhX2($yJBSc
zFV6NSaPU!g;xVQ>+5{T73DGkK+(~?c=$NK}nIsxL{=|RN`Y_=Qke=O9`Os+@XYi@9
zVHTrwexvc|TOTn$8igx)wvEY<lqzI>g*w=Qb?fZ=^Ycv%pC_ISb*}51MB(_L7^Pav
z8{WRfCcZ_-Y@q7q!*v0#GdF(zmK%4%FqNRGNwZYqcWWPSZy%c#xU#ZOM0{4q{RVxN
zd<Fw&cVUL2yjYJa&diOeA$s-lnOV5!>zZ|t3&b?5bVWI&c&sMNDU-{!L~ilDH<hXD
zA@&&)n{C{ZZ-V{DAs{an))qx)El+nk3P1dyGyZ{$<|cCtTEM=fACf}-z1DZmM#lwQ
zS&SW|J05J)rQ>@&bEn%qMqx-|EpJPf5-mL}*P7_Plv&2CPRG&)n*TfzlhV8x{J_^J
zM)h>dKCuAYRJB)^7)h7mw%5I>*{>pp`2x>v=GE8>gpN`+F8Xr@ddG{0s621=3I3Ye
zT5FyVpCOI&k`kg{dX%nOz(9i!ib|JPm+U1+_B%piV#y9Ube>C;Okzs;AQ7M#nrLo%
zguP1sw6bK}=z6i%6VWBOLG1YFa>*{?jL%knGm^zDc<?8a#Rp-tGY1c+)n`9SmfIk=
z`^I<*vxwrRV@fJ7p&cR-%QdRP9`!^bapoH<iV~Gna_}(+{F|43o_Ic?mTkJ%iKQ2i
zF-7*32RmV~9@@7G?aZOZ`z;u_*rvn8OfpS{b-#Yj$yR@TS%}Zi7c5EABdM!%v4l^g
zb$3QfouAT|ML_KXN$?lTY;w_aqK?;sFU!A#gqS+%Ko_<F*RqECHzpHG<)kK`N5$I2
zr1Zu{ZJPN*91*T9K<DO^{l4|d1Y2hSA<D*%X4Cqvy{c7pLX1*A+ibdBUId+mQ6||}
zG3*pTxJr&609c9B<cusgx9(pmk4dLK6ldmM0_koR#a$7XlHx-Azr<oqUIp)PR_R7H
zn08htbzGj`pB(e3OBf+py9m3C&Z`=xdxv$mKEk=m)w0{AMDI+2cVh?=B@k6+QhD*G
zBh3<$QP;)dWNvBo75kJ!y&uNPA_#<tHQOgq=TM#+rwvOI>Wl^qO#gcNZ*VY-xiRp(
ztS}?j-fR!nZ;*W@#3Zy!IhIeMX46=EauAHD5Vsx)7RP8*`vN(~$wA8*oWkDH?{3P<
z2y+nsgzPnr;6ZjbAYa13_Ez9Tz9rDA?)~GYI$xCn*TGKm`lUI*3SRF^Ui~=9F>Hdp
zRpJG>VyZL6PzWI!8FQY#yfE0Sb$-jJTZ?A~jmJ)^B1**6?O>9YlG!KPYa<XlC*HUj
zYhZgZ<khE30Y2^*^!oJGk71(L-l;d!d%IFWEMY7m=uyztczAN(rDXJSH~z}VgCaK9
z;!ykUK55S+9(|qw&_5%NL7rr%CT#Zj1i6gjEcJ9cNiRD@4T_mnx+|RszPmTu2k+0M
zoj|V|oKB5s54M1pqDWU0eePL7^4?;r1ZYp?CqRNpQs3Q^{LxsQkek1+&oi374mhlJ
zdx@=?2}s=(-*Z&Y)+<NTe5@4nPO|vnJpF20eBkFA1eakK@?5-ZcP9-1T3)foo*Xjd
zHyY*9RZBi+plhD~b<lczP|lD(c6$lyUd4^*E1xE(fN^TpBTq?1P720fhsMqWURH)b
z?t~Ox4H<^%7CVv6&I*TM{Nd|;;B5%}LnY8#^Fco4kf2fewUbPv=Kv;v>UGxO6po-L
zia2~<46I_}_xeXuLM^-5@7QAgOl?SYo87h+mTv&%h!A3v3ica?`p@ugTox*z`h{O+
zFgD$HDOhD6euKS(8@ub*Tk{AU%!X*N(6qzNOHH%2*wpTKZfzOq-^Fk5_Al2V#)cMV
zFM;nyn8~R5^621ckE5+$JHTVq*&>Oc@)`DVpdDG=7r9vVbT?kl@h<8r;y9@UIp!CK
z5=&pHTwSt{dY47S+MS>rdYAsf7fe3-g8H#p;2EcC5uaTSFZzfCRx;dAhxKa?S4eO2
z4~Y_ChGxXvFT~hLkxL{out~ReaKK}XCh=glLD~l_Ox6m>+psPSLt26vnXGSs`IYVq
z(M!=+-KyRyyF4hq=l3W3b0&{vB9w8lo7$PIx7j*8!+nsAYhMrU#W6}p9;eq10%j&P
zFa56P4t4na(I3RCmG#p%>plE1thOJD)ORmOL*Ra~U;OG)=rduNb7>%;_h>CNtM{0b
z#6N|N3Um!WmIuiPg}dm`v1{$i4pZBp2hlhF<ni~Fh=F&H!#n`bi!Au~&x0TN7Yxd=
z*@y3=p1F6?D#looJQ6(Tdo(^arWZdLMc!Pq!l6B4PG!bUMIpo-OvM&tcP>{h>VE$Z
zm7HQ!hgtsaV%GIvh<T6ibq{MW#vk<KM%e|7DDEC#TBh5|Qq<XG0$JT|2bau$267@A
z?o&pZcb&?VDBW$HXgkhzS~ssc#F&9SsEsWXvYQH@uJT7;8IpQ!()qNKBzNe<P^ecl
zC!L~q3QnfkE<qZQ+QhdE8TfKI1N<poh5Wu~>SizoVSbe`LWBYD&XO@3*?L`y5rBbT
z+m=KGOEbxHVYPbkU878<X~3_axdHh1<o)U1tA)HMff57f=6Q=BjcXNYOs#_NJ$)JH
z51a}xVdggy2X9v!3=7nV`mDxQ*<h5BtLl38L>wPLmo6>U7qmy+W0sB7jM0VMn$vc8
zD$H!bv^HrhZq3$wDQOpf8iucvkt-{+PRqb(uk8BgG<&1-XYP(M(i$HTa%dG(FEA2(
z>3O!MD`mMD@m8U#!!G0z%5X6PHkw3Nw4(eLNN)d(-jXNaPb!|gDK^3G1etuB1P59_
zvYYxBj(<Zhpm^{ZWJ;C!Gk|}-yl13^$&|$aO>FSP-w?DO0$B7VZsXpD=iZatPSCC_
zfboYIp49zb{}`1tI1Hi@%WAd3Rxfpow4|5{FLmli-YDvkKxD~bd~0_(-mZ*5iCsCq
z`>F;Z&E`MSJT=LddDw=~^a5g?enM7w{`2vBl1jhD4@#b=WbX+4^L;_}9k?yGZL<55
z&6q1Vg*oVpgaXd?pov@S`QTK}?ZcBTICNBp?wL&8Vp#eBgVAORJXdbXpjXNHq%Ml%
zxcy<gr3CDUE;k@NT?`ej<J;ON9g$jUZ<g<ICJ6%>nIXVmXKK+`wJ^!J*n`k6;Cp!b
z1y8CA3|dmmc@}PULbe_#K}?kLV(|)2-jXko`Z>%-dx;5wTA_wO(90;uix#@`Cks+%
z-<OlpuSmJV7)HZab`Q%{0j+C&J?-Nwv-ftDPq`De`}*XBhn^A2>%G>E^9os4W^h*+
zC`_opRaM&V2<wS%<}B!5+S%OseK{+O0#U+YjSjgxtE<Iz&w!2!qxJlkT9ZlO2r{Sm
z8VjYR;wl9V*`B>{{l0|9`Alf%S=8{$Pp(l3VK?LDgfXnT$mD`9dsi>;b~r-zo1&j|
z;w?%0-`)j8vzaS>h43n-=%J>VUndNEQfV>?KR<O!qgE;WNxU^E)rmk2@54U5DAV~T
z%uz^G#{>*U#ZFfw$=i1L?mK}*!=fb!1NnSpzRzQkRGfY1oi*NZ{|sXRCE|%?-3|ZV
zAF&aMA06AUUYLn#nnCJmYkdH?VJHkc=Vjb(J@_Iy=|6flw21L^iCFj=rli!<cOa%L
z{@qPT@(hdN3<}zPIg;!*ZZMQKy6Up_>U@VO-j!N%$FCCRa|=ihB9J=RW{9gowtiLK
zo)~<y)s#^ieZ=vf(i1O7%`wcMS&ptS{^5O%KRA}NMgQ)$5pDE>;%zAz?hjFC`KhW6
zN4NW2pJ5vX%T52rd5k#y?~222mbQC|H^=br))13@^}<AXZLvP7e-0_oqBXUY53ZMg
zl9PIQ$+znm+CN^(D}Y?ms0RhBHN}>J4hti<d;F^oZ_^FXtw$>Q?n+;!<WM3#eJaO-
zQ8>UGT`V0Up5x6s!7rp@P)sqDh|aq17TSP`(GClT=)|+dX3<M_cb;PG=g+w(JLzY}
zR<f1{z0<h}ZSKQx*&C^SnPOSL{hM(c5G}Fu$C91xj}suoAWyaqRG_y$WtQynn};pS
zWF)!Ipn_mvTx3GQCZB7^Bt%u`+TMuRXe0z-i;&D4CI5=$c^si(C{gcuo(f>8X`9X~
zOfw6<35FLNy7c(&uworVn2f-c@O<h+jZ%{jplG-u#%R1j!IQYa6E@j#`zh4KNO+1T
zkrW8oUaYftT8x>zPk%r@<cTg@k)YPRY^s9n?0ikZZ+JdJ5vL@2{*RV&#B!;VrData
z6K5sU;mn{X?P7_-3rbefg@yEb`MQimc+Xebi_Omkvok4lRlC2lcwnBjf?jLKWktJ2
zua#1)8kIoWWr5ADdguRM4LKqhou0|G={GRRim-w~q*&8~Z)QcMm=ZC+afB8zBko!v
zRL{{xZCN{lD;iU<5oB;oklQueLhwHCl$HhhNuJ<*3_jBeABPuZCYUqf=d!LY&TK|v
z1#ipH{4l_TmP^n4PMK+~$a!Jy#?SqxRM#|SRR1LRF`J7+Yk)QTE3G*R!@9|9IrWA~
zDwE5yx3RHtk&Te(`<vm5P;bUhiZbd+rI~a3Y2}ObT!T#qy<K8nveqH(L{Rsox_*9i
z_jQIRB);=4!5=RNgZ#|@I%$U>kV5X)pFtCq^8gclue8~#F5yNPp}k-w?0c<qeK|8v
z_f*VPO9s0E6itpv9oHfrm|@w^A@YKWpWkH-oA5n`mg!=+a7kr0&QlpMBgVlMkPiQY
z1z5o4PXO(o{&gQ@hK}D2NX`zjla`&<oT@3{4RA~7GIo2mfP^@Wn{6NREx<=6s=i;c
zQ|B>Pf|-yz49+(#cP1na?IXM$iVx<JeQ&@ot>}36-%$wzuSOuakbJ0Cz`<{f3$w{&
zp<oMC*zpGE1tyLxI;aCQ9`F1NJi8`rCYA6indWfy3ALFVNG_i*fd3i~1{?3sE|ck}
zIDX@?Uz87lsUq@!emH!C_`HIJKTF>i&5P29%Fk;=4=khWKEL({pTwrXsHb;fhC@k<
zdVz`ffaJQmMu$QHy0B_uW(VE;DKPHKZ~tD`1!!KFZW45T=>=uwjd|X$CtmfL*)1^J
zFv4<J-PgHDNt<Wv(#bXXJ!<;%J#2_=`DA<>NV56Uj#{^laMA^E{__~>H)8)tu3@vM
zqwRB-Z4vW5e3aGdjdMrl&oAwSYfcL*GS2g-(-z5vMO^FC&ulX6wUREYzrfRZuy6V8
zIW$$0Ld?C@tV4YElN}Q6=Loym*06`1$1UENj*N_luN8Z}n?G4T`VP%7F~|bVyf)_>
z>)5g%f2DVxKPGn#v<>qOi~TaBW4JBeKA@bFZUoKX61I$9k$&cuL$GbfgNc4i+*dS6
zc}ZdA*j?l<V*B})P(x(qZZA%O%Ga5<KIW1w6o$Mu*Plf_nLLT!LJ;5G7vGxy0OU|N
zH4RUHvfgoKzFD`Fs9#+U(+V}O867v%6+-trb<XsJ@kL76&k-)|xX`(Z@r8h1ihQRp
zo(m5hr8jjQ2^Q$ToC3%DJW?p?expY|r>|G%6wSeV4{Q2lrA#{Ox0>F*`eRSCm^}Z?
z9~ij2)!cx&??u|mcci;*90)5*l*w>XQKU<jBOtGI{Lj>F!<hrI^#}2ryz2Adx+Fwk
zLvu+oQzI7E9?E%+K+r_Dw-E?3xb+T1J%<Ei<}&zc9G$L&X**Yt;5J4iC+ASJ<CmuY
zeZ=>I(<Haen%yei%&Z_4U;1h!gL>Bgd4%*>Bhxe?dNJ7<>HrZ8zh=MgvocP2!#q|A
zMGVKJZwX@odh61|J)HtE9kn$@%Dt-LTR03mW|R6nW<1jC=cPx70EbBCNuZI1y$2k(
zS`+<((+qlgyfL!`H@UvO{#dkIAP_UEVoSnRlt?{nqK*tWcE{EJL+RnSN%|Kw<Efjc
z+Z)W>H`mmEVcxL9n&u_xCZZ8yesgu(<GJJI1KuCEr*CXMY9sJzMl->5*5k`Z>zKD3
z3BRQ_`LWt|$PFAsb!*m@3~{0w>LJcMZf8~*_4mgNIgw2@!LOD{UygR|Ke|6F*W?2Z
z0_8l%$(BE~KgI7?6gqgK3Kv5+He*;^=G?3?3$osEMHt)%p&_BvsPpUIOBT@S5=-%F
zh5gD8#Sk9E9lR@r`Rm(fUN<R^TU*m%7Ylcm{M*z&0xtCQULjY-^d4p|-xq?x`m7g2
zx3h*CvOv=%5d7w-GRU?Bz>;epqZBo*uP88&6u8y|Zx9cCzLHkz_z{4k*FB7;vf$H=
zmphMnQn(yo#A4ie@B93pa?36p8}mq+A7A#0Sl4CriQg7D2|aWwuBq7$rygD9PmNo@
zP1@hND|AKRw*;fvt1YlEOu9J0)J}j{pZywAh<;etCe@c=v)@q(T5I7O^`}Z1q*`p3
zRugjm24rlT%}f4iUf(B%LpaOF`nNMbkIOUkTpP!?F1-~1Jv~`JBYHkKVml_btlMu+
zsb08O=I8B%MS9vVJwA*_n=UKOD+krHQ~uL>u9-*sV8QM^$(j0Zp_0SoSAiau;K!%Q
zx1X-?Z9PuL_+a8li*AB1E00-wP^hxkoNZfkd0aMx6hE3ssT7vv^3e%d%f)j$?kJ))
z!$M@yv=%~->@_RX;|rwmUopdylq2CMUr3tn2^}_m)-yhj!#+jOAyyd(JBZIK1QG6c
zC2Wv|^nx6CE&l9vB=rW^9GwRCxAV!aYHg~Eg<Nts1`x_cs?RMtXic3^C8kv=OYn-=
zz~W8$Ajb!%dr6KmSs4Z-pIK6FEMIgLSlL-3xz84PouU0)C+K__NNQHrB3wN;86o$1
zUM9Tz6Z(M^Vl<-)cNqi`agYH~usG|LLQh`(HAn*?C;9jts_uF3#XTK*B_}lGM}>l?
z??sEkk1R8i2|SFZp9>3OruQ?}cNVP)zr@KE+X_kzY@c)mVGjjv(@FdwEn8WXI0cTY
zONBjBD0!D<eR^QUcEEEAS;zXYGI;IIkJgS<Bk`Z=cLhmo-QKlKsTA)#+eP%=^K`-<
z%RO2X4v~Kiy{vbSQ$$1T#Ikh-nqPdWw0OVrrT<r?=)h&|fTSrhm777_vWWqn`Bz%@
zvVXf78`lwsj!X_E`~)!I0?MyHwfLE2JE5q~piVc#iC$UljK5vWmA5H2EJdrde7#Cg
z{KYqGMll&bCyqEqw0kmU+3q+qn?NSS((a3r$t;+POL=SoQDjbUfvbf6@K47RrCy0|
zkXPW2_3DdB;5S=|=LHBVD!qhFu0gW*Ryywj{lMjiTudFvFn0f|ZdHVqcHmC#9+<KX
zOmbR<{fnmOdFU*&i1Cgdj*%>JM0<adu1Wr_hv|Gb502B$n{ic<w(57)1nf_RZ()*s
zeSqy$^%p>s#0Ix2!^P2zj21{{fu?cgY63M0x=;cp%-t7Jb-V9cg*vISl+#K|xvU~l
zI6AyHC)&=rOjG35fQOltxhVW)GQOyJues~#ImhBA!$ne=`FGgAk5*{VW!Cdtwi_Vz
z=y9%lReEt)gXc?8@Zx?~T}#-S`X6u07elGXwL0q0RhfHl^uu(?AU0|D;<c#)E#IK)
z0W6gzCFx|2?Tx!#9Wzf9SFGArakBHma<otD!U8eoixdCky2iLM&o|e;-Dn6=7E+K5
z*`<~;TY2(g^VXp!oOP<tkuRI;>ISKAQd}vcS7ev=V_8)`M@4P4V$~X@e0n*GFl;9m
z`^)KxzkU7LPU?F|lzm&_w4cAx31%58619602c&z#^C|QO{KocS9SDHJ?(5dpuO?GU
zcxfXgdF}AfE0Sgo&`E<vl^h<-@Z2q>*tNPc93=W!Jkamg*Qd31H+I-s7TZOdUg{cS
zFs0rEMZssPS8o{F7D@=>_<Mdj%g9@0$Hu6=lSe;6zc&d6U{sWOb!}Etz${=AvHTpO
z1!M)9WAx|H3>LHJPc~fxON}bKg=b*XvPL6xS@+q4ilXOkBmL>ca;nV5$8#opvuh1m
z%WW~HqHIb{7k=FL=OT-HCHCRxutUFC9s}cSVwR|5xnm@EA(uVoQ0X3;*!cA$1I&Z&
z3Z5)SM@Irc!lU<!s;*15iq(Y)k4;TZwzW`gPNgx5tqKk;#8iGzc7XIW3pNc$?f<~x
zNPpUk;>vP-ebLg>fejcYK?>i(-uiHw=u`xof9Q94biNjsOpHWi3p0A%bS=ZNB7}iu
zUiZRBIZMk36{$bNvuF`P0g{8xrQLQ_thA$b!@BL)`rlThWIu7H-%H?ICEHz;;Lf^J
ze()&&)d|9XHTbr3qufq9r6pFVcl~=`3fsJ88%!0xa#T-=Clq7Vh=GBve}u)1E>+u!
zPPTe%OW(oU)YCqSvF0SeANUS#JRAkPBlqMwb)wgvOnkb;V(Ds>!UR*b0vb&1mopHT
z$y06|*BwJj>#sSernD8m)&bpq^M@`|_jgHJJlR;swD{#uNV4-H;x4MKteKei$rRdr
zhmLCgnMU6f4EnG0Ch|qX?=CW3iH{RYDS6^0Q;F;w887Si07@FW$*nWy<AHc7I!h@G
zu$!fz2D~91_gmi+)gGlP)s<hHiX|u$zqKfpfD=t>Q2~DM;H7KDgkp<WZvnf!`jLa$
ziG_hTJy#xcQylOw%thw_T)iGfnzALj_J=})ET(m$$NgSI)7hjXyMCgxh3GU}vL?EB
z&|6s``XJ7i_)8Mx0+k;q&EGHA`4*0YiPN(+4<W=EZgLEHUhr+)x8SWOSv|u2=>{}z
zpyrVYb~n|RfLW~NFWKrtX6-mi&==44A8jHq_4&f77tTlfJN3Ta3+TlgSOK<d6@x?B
z{ZVoko*D}<m?4hg;I$hS1D`J5j(f5$I-7WSv0XT(y9CYHP8Kh>iQQ1QB;3T<HkUuf
z{1qXq2@*(*+%NKlf1rT@rw}z{qI$IDJ@`&={Xt67dlLrM4v+PV&&S5bs6a}b)Kcv)
zy&=G9dD_l&N<yOQIaE|P$oCu5oq3kLw7j&sf4i*3S6}@ax=Tfyu>y^&kyy{@Yt-nv
zH=lS9>s9+6gJ|xB*tiGi)6SHrzX5Az<(66iFPpF_hOYc<uCl(*+|qv!XK3rNH7YRW
zu>TBVLIMaylX<f4ZFPA<U#_f8Dnt^IDz4z6YKCD7+6CSEwA5c9wzWWmX|N3en~`9M
zEuiw1_eQcBEKE~4eRU#Wl(mtRHEHEsS?ZL;qExjvTuSKh3_Mw4s?Lbm;fy#+I~7y=
zb^9i^lv&@>26T-6yc*6Nn1nP-{P!VpB6$5JK(wntxQQlAa7Hl&zj;`U5!z^n{ic1K
zgR0uDi=@^mB=5RGb+kj>q$!Mg#*}KEN&T%oj3}ybZ&gNR=C<^njsd89$p(f~IzkAa
zYMk#)J=J*1D2VeI0r_<Nm?EZ1Is9Q3qx5ZEY0S$EOg14Hxxn9lS@zFwXaT1nUeFlB
zy%TgVTEjR$Vvb0^o8b(LpNUh0bc$>cZZbF6RgS$#qJ2sF?RKtW^K-tb@YW(+;{KYa
z54Q5UMO_k}gCACsThfzZ=n<gXmYNdBK9DqlgTom~wn4MK@B|@y&4v3xdq6?*NdN*f
zEI>(2h_YDx{84QGI)7eq!_D?}JU6DLtB9|Y;ZOxDE~X4nuFe+We%I3c4K}P!H15*j
zYExV3<?d_N;#aw;nQ9gTl<%Gd$W!b?DGvkNtyJYr2`AwKljAYYH3Z+ndXH7r`zL0}
z5uh!M1@bi%*7Pspg>#lw_jp6Ao_{t|c!)Zh5IpHxIpAa!P%w~EAASPt8uRhZzUF0#
zP|uRez4aU6kyX7i)=aP8d%X9=+tmoZQ}(R$%9sJ$u-J~rX1ksX%LkbSBZKv4FPWNf
z=No8b{L%C18w@?&m?*aKZlMKVJSEU(-*2>`EMUoK)fVW+#FUsM)Q=_oj@VV7D$B>d
zG_jC6@u+ACDQ+33cIuAXGdVrEvIi~4t&e_Kv42iZhXtj~H3ffCF+$Aj*U5-+p}8>V
zLgoJ8Au#_!5Y`1$)3tfSuC|x|U;#^!4?YEjM>^Uk<fJ;%zsL0z8!)2&f-P0xQ9l1-
z!DFR(4XpN_y?m&GqxlHpw`w*e@hyHIwMOshb4?B}*VWav{M7n!usyeG$QveVy29(L
zqNJemuweE}N-6d2gXxXKcOL?HQ&X+X0dwF@EpRe2Huk=~;p{xopXA)}RUvi&`SMxT
z@BaC^{Q9hs+VcV-EVOWvAp>W~!JT#A$NqaJZ&c>KO{`=%yKC$+bZy@)70%K|gu_qZ
z`>4^={x{+Dl@$)}{iheLMXSl@`+p*t9$OreW?{(>>|68`nsR1Ab&C0TKAad*5WgZ+
z0lQvboO=^~2f45B8F(+dUhsyIGBK-}n=_wv(+JLNp}Pr+*x8z}Z%5vySAMRsmYS3u
zoaABlmNS`~8gV&LbKDkR2vKymXQkl^U;7Qi=A!0(d><y!EWwq7`^xaEv84kq$$WtX
zrZdjf?UP=wt4q;|Cn{a$KcIo5p`o#nOKx@QW1ksqs93a)J#oLK%36cY$`^XXY`P^X
zt$Iqmjy@+Z7rz`F^m`0E>KXS0bQxx+(F?tDzD?(_M8iKPhJ4y?in$;vm$m#Z5kvIH
zLQI9(@6Y7(Ckuy)X`PU$=3Rgzzs2~7*%F~Y!Y`vwEzgyI>PKg7kTi4|>zAWrTkjNt
zPPSVjt2%0pIm^8<c6rfb0}>vXnKK4LOASY#uul;>dBk2d5<h^`76LIw%ozeH4?EjD
zCFQ%d7^NCz++xbc-nw6T#;2_jw88Y{D+mCjn8~aLn1#BX+<z>mAz#V%t#qxmOB}^f
zbGO6W01+u{X|~Pu$p7NmdTI8dcsK1Yo66)lbu`9{z}ior$HuDutfW7-B@GA^dC0}e
zCzG+rkYN=oK_@2HbK1vWyH!%lH*^>M;s!&wvP=ganmR<)*x32;@h{?F2C0QU;YTz1
zbCtgyKMYo(*W~{F$a%4vkKy;xpIkH4ouX7n!04PG_Kl=eWPf2)$&ta{qUr3WJXu|;
zzpnR@@{vTAEa|{DVeej}PJhFXEhn|RNxXpV4~c#U9VRKjbpcsXM=DDI{|;Gm8|}sl
z^?Eu5C{>uDjzD;l(1OczWWQ3^7_c3i99Mm)!XqnqaQFLaH;1$5J4Z*}rm^ks8otMw
zI$`3+xBfWk8#T=BX3}YTQ9Wh8k1P!O*ew}eb0}1<bvJ)EmuAr=Hu$oXMT@JCg1O0W
zMAY085F^^@?Go%aooOM!KagR21$*t&T^`v#YfCRc;h}7Lg4W|ru1`VB$mhR5k;xlW
ztD_;9_c_mW!|ei#<TArznkrAf%p4qy#02?Mzl09LD@xurb33*_ia!x$_3rNGOZfB-
zoi1L*nrbXac|ATa!FN!FjVV5ew>ZjIgZmP{DbT7ZeD0imU%K)?qKFEmmVGrCXvBLz
zcB$n!lAE{0QQus_G4_yNRu+)WKL*Hk4447CoJ*;C4g>wEz@!&)(Z*pbKDA~&+z#Vn
zImHrvkCLTHgcoVpGX&+z-kN4hm8V^QgD{`HK$rgDv%VQ6<qC(o=Do=9d=C`A7R42H
zJit^qTB2iqeBHHns;w|eI`_rP^jiv<UH97<yG<CbqC)1AVqS8F9<fR?#;rX?n-mBM
zCiwQ=6)+nixL0_m!%C;oqhN%q-&|Bsa><wGas%)WSc0NU_7^WIJ?mgrX)?vZq2Y?_
zrgss*41Z73l5bDR%8b2A$R(0CGNvaklzI<~6sKE0fX=e6Y=~DXPMD@I=vH(2i8D5}
zRO)JX)gK6DdR^ks+HRa6l=NV$C~4nFM|Z`F0OnvnMbnGjmy%6{D?gm_q_<-KS79<7
zjD0xJ{e;G!BIK%M$IYg|g~kMKeF%kkLWUlH2jDNJJh#knD;vX5w$xNjt!}`6zWOIc
ziCOhUZoCD`pY){UEN*S>9F`<9Co2b*%|)rsG8C=Ge3;@=7CVG$*S5cgl0-?4@P#KF
z%AGg%M}^hvO$O1Nf7R;$3<EbrjZG;!Tq=06@yfcd4<tiJOF1=zoE}2}jjMR?NvdJM
zTr&{fPjP(|?3<<rT$vc(9&Ux6E^boX{#n`T$2}`o)hqT>Znr;qs_;U<Mk{J4Hz|~o
z>CH#k*&EqzZFm@(ba~2Biz6<xS9hy!NlX=th~vzwzE96edWh`w!inXT5m<QP9$?$T
zd^+_I5SI)72J*75prQ`4OIVr*I)bP}cdGEDZiMPvm!r_{$A2C<@AEU-PIQqo{${9p
z5HRt;RTQVCIp@}Rb)ZK#17p<0RkY1Rz$7qPhtOvdPsDA$5;JYO#g52!ybw~CR9eS+
z&pRQ@FzQzLis3hi7XotlraHE-Q!rT$h%ZG`^w)Ul&7tSokJ|PK1WsaC$5)MR1p1mC
zgCIbudTk3sDd45JlEi`dPdA;QH7oHdHv!L>`El#OPQ~Qfq=DMtZLE$e)O=B#XXu}n
zE8(8OR-qf@j<XM|hTzZa7T5h6KG)P(H)dT<`ro(D9Q=mlj+8!iG@bn=%qD~Rs4As;
z?%pLO?%Om4lWpoLms!u@SS*RX9!usOiZeF-w%m5)Xv(_PE+SjOIMrkxIo0|?MM~i2
zA^cke`nrf0^HkWhNWbE*saLSgtoZlteYx*QHZN}DmiP4Cjd`;7_4g>Nw^<vLd`3>{
zcIAb86Xg@SKc0A~PN7K^<01Aij*`RE)siHSw)t3bzWd?jb(s$YJ6eLDqa>{rUXqwI
zCoI)HO<q#9%MGRCDPi644R)*RQQ>>E6cZ<F`EdcJ|8Qkp!ZYZ8gE6piE-a=uRtU{M
z?k+?dv9+O{7T6N`>Szg<(|sX&$S^Wp|DW6B66<iu&4mdZcXHDX(h?T6g%>Oe4nR^V
z%5i~4_|=`#tJ(pANMZ;^E}7{dNf`6F#m7-<LoMj2&UUISA=15%x0+0Kt8Z6!deY7s
z*<)EIfmfY#{v!DWR5Fimb^qu7Yl;mTpqIoT{)$dtks$r)B;~WVlWd$wEOv5h^*~&#
zf8|wD7cTvYQYhu3+;R6$LYW#R!f^msd)Neqz9l;qsJ@&-VNRr?zHZrt+eP_|l4}TE
zHzfb%3tj((@JkXGAWSX!O<+7SsEjjxr~YO&H&*)7`kFo~sW8=VPM;6366fJ#+p&h!
z>IOWi_3hu0o)lyL!yQ%RL#a$QXKotp=}Vbb*%SA;`o-KK9O_;g0JytRy0tSOA1?VX
z9G|BSSn_aMs!cd$9qzbxxA7s-H7SN$fM9(*BH$ggt{_d0oiXZLZFh#D{u%Fg+=}bZ
z2%goOWE(7*2R$4-wRcqKGynx!-G5~|@pAXEGpdpx<6xS%uJ{t>$vFo{#}nd<0cBnE
z8JFW3hZ31qLlpxWK8eZlE!i7x<Vl%5*|v<`JVf&J&zrJ#ENzh9+9^8#BuB?uvY0{e
zV~L1O?$;GV<IFYnk=$X!HFYiHXh$MU)Cdj9diZR|JJSVje)V@v;(P~~m*=PiwRYQ`
zY$<=t=O(32_=jp7orF>kqAlkOoCExAoz8%=3I5HMg|oP~gcn5JvyE;aP-M0@T2iVJ
zT9?=cFj@UBWfoPU6FaTkI91$`7PWP{1MN@Fa2>ogL6h4qFs`(G3Od@3nO;5i1fmER
zyOz*@g$l>Z{&w*j(t44@)?b4KJ6`5Zx~l)^{Qg!iA(Fr0W9;v_|2!yuIRwJ}uJHDb
zO3p}XQfMRZlP#|<IR?vG5dF$#kj;Y<iSRE=8$KuA+end^m=<kftjAW5t@ZtNX@|DJ
zn$rEEFD(5u_*HR&$U;{jgzc%Sm8hGH=sz@^%69Al_#t!3$&5G4QTeHF`z{Bqp>396
zFaNji1{<$%gMl2YXSBh0@`u@T0Xi{4B=F2Uqa9*Bc-`VGc@iE)(_$G)5whd?zn&%r
z#mZ>qN{TT~;ZFEfPk2#88p_)1QOoE?r4V06^W*r1_9xWob0GM~%sLlI7H!9aiI!I(
z`}Ji+1v@!hRu<UJEdY->h0ar~F>8P7*tC%lJdBhzZPpQAXNxKuGSs;133DEWZmTEe
zP=qSxySxwgV_O{6z*_NQy(!H*BGmqM+A-dshICkcP`<<VDT2c0voHOl4TGoKM<%5C
zKPc%x4}bU%%1d_J<Y20MrlPwR-Dy5dPVCfR-HmmD@zE+If(U`W;7KQyF(0!~d*@Yw
z!|g#$f@!|Td7##@;MY~O-VP^sUjEaXd<r4fZIB)Kl-l_0=C=rA>v##q)JrFj-A!#>
zdLW0^JkQUtaFo#G342deU7l8z%LV;bt^FgHvkTwb6F{$|g&v}`YlI50A4y?fQs=ML
zY0;)+yxFL%Yka$cq0VzEd8~H!1;Po$FZVjXh!MlLedeyD!h8AEr$M8U^g%NBk4P-l
zJ)AfLAX42ln@9(ggVq`yj5~$5fW!(xWKNau(>)0M{0UosU6E2>pezT|N3iD>C~(W`
zN{XftSxfooN%0=TG8U<V{q~!*j3>oLi871fYnxw%w4GVLTV)t_c&QF$AVakB)&Ap*
zr$NrZ@&D@)|Mj0adwAnF{;o1|F)$!~Mh!0xC<X;rJK#)^-rIywqu}d9?H8``nSgjM
zIJlu~blN)ix^eZgtHLZu^LY6nx;#ecuDNKhF`j(qBMojWRmL-lp@I<V{x0FKM`Vw+
zE{$uhsi$8``_zf2A=1mPyEhNcKLT(N5F$4>`=z&k_NT}@ZFFe`HMx4N>>|pDqGVub
z&5##Y#s!}d&6Y~9_bF(;qOdE7P`*1$GcLC<Kg$R<!N!oM%q~#EFK|9{lW%|_xE)~=
zwlt=TvOMBkk~V9+LA>7=RNq{4bd7fovD6_=c;2_4V2U2@u|3MekNHcv`=1jn_Z(#h
z*^>YMUqRTCDM5>ku`ABbR;bIL=MaHPQ!kvCyEi{q#M*s;4`z<#aU|M0;=mRhPuoR0
z7KQ7Pwi;w>ox3y@#27=V#NV*Ac*#_*2ia2tr~ai9Kt?g0{KP!&=Lsv|u=tVmU@3Vq
zVh4hW!j*BGJXi#dmzd5Bl57+<NaPrLf3+eeO5>N>+<uQZZJJCAKg<?SY+Z;sc#md7
zG(hMi3xHXd@C;u3|K6({X8G+FMm0D%Uq_I+cYVfo$&n+dJ06rMyqZZZI(M%9Xs~LT
zBAn$*oBah&*$Z$_1tO>eqH=L4ZyxPuQ+SY|ckv%A;IA#ch?*^|j>EpB4L&PY&*y~m
zmy$C<uOKq@S`HyHe}=@0o4x)?t6sx{(Ug;tas0Gc{x_t&41!6u#cK)uqP>3vc6QJp
z2*fa<X{p1Ephja)O?mBPae>tDc_g$>RM;lbHUt5IG-QsJtyoW{60nt`X-Kz%(mqLq
z7=hlzEGYm1k)#xMo*lMe5i6|CgZw93r_6pSp@P;#8zE(b;_L0{v%XY@QW<dxUWOky
zBW!${{C~gx_|PEV|0;rz-wOVr0C%|0<kWs`cne(DChX6wwn$vBqDWvIZ>R(wEfg4~
zD;cvCf}&{64G7guP3z)$-%j|AO5>xX*wghg8b|=Ljb-1S27HtW3Hz-oSE;w0tU=#u
z!ifcpW!tWuh<W~qjP-U*jEosw<;LZS^LR*_p2cv^bw&PetT6-_6aWP^A~M8@RhAN9
zp-Ts;B351nxXrJ85Nan>w9dR@2})9e$GF6|oMr)-V7vtns-s3p8BCOy=Y4riiDz%U
z0?(QdXcxN^3F~Qs;XbJ>QkKGH<yp%5i(dny<mD~CHi~8`&YchK65<1wM=8Kwg(|cu
zkd?yfo(iXNgGd-ca5_0l6|c7;c)eyOdXPWzvOJs+IUjcO?EmxV{ysu>4>9-efZ8Sl
z&P>Ti!%9jN`_0z-9i+-3@a)Ph4`e_}5b2x2-A<vcK}z%84BV<w8GGOkzUl<z9ZQ6=
z<|+>>O|-SSgekJYEm}F5i0xju?bpwx`anfKMzLRH$`T}cR6gNydmciJxXFCj+*%2!
zHavnLk*IzPBoV3oBVP9C`%QqVlTF=+IUx)K3kUl+IV~6CDJpD>hw3VkpVa@O?7HK*
zT>p1QMK&4PyR!EvO0q|?$&9R!Ee)&4CS@0i?2#R!WEUYaLS;q@ABwVn@7rnUobUR*
z{Bb&b#`E0w{k-ROUGEJK4i}3Wk^<7X>|^X9%LsVxce-4Q4#6*VUUchx#M{D{KZn6L
z5avo#A@wW_FiKi(CvmlV2)|=a@f~-$@%Ot=wI?l#-Dx|f4*|USNNCxT2#3FBx&mdn
zJ<vS(Lsdh~SNk4(w~jp7c;itT<gT^Zi4ZHAe;u@cNSV>gW&!q$Pw2WL%I7l<9XmV*
zCy7KYj7p<nTuZ8uAn>t{wzQegdlNW5J`bN4%&CmP@?F&Ij|X8!0U+m4H?^=OoL@w;
zl2X+l8;0+YUOFhcKdQ*fU-e3-Qc1Mf9MoS3y#_<4rWiw=iRVifL>dYd{VZo8^W>fx
zfX7sOKXuf1um@>5M&k=F8JpcE;TygoWd459CEzgPBlz8S-~fenC$Wq{Izf!2!!)%K
z_a#k&?T3F!ma9Ae_ca1rgic6N82M?be))@cUlx-1L-mLCWUvKH$sj|M*^&L>sj$aA
zm}Hyg3{_@%?Tsd*@BwTRcO;)AmGc1u5^HD9;b>TAND(y&GhLd9UdZ<AuS$`=Le)g~
ziU!-!F@=BS9NncS6aC|!-`E8@B$}dZ?W9tK-g5VUFH~`a7>@)x52`ZRRPzn6{0vA%
z%}<FlMem`&w~Lw45ku+H;=|4-697Iwu6)BNM#ji!K{~3?b=Jn@LKXCw!lV@A+TN?#
zL5JGXe4l=1R>Y&QsRpnXQsI~MBD3_Ed?1WqBWZ=2wdqJhXg;=djaCZ8=pnN+U@(@5
zN+0~gRHqJ6kp4DJ(~P*Giq?-6pU!6BvSP!7ZdBH><p#&%!``q8P}4hbYfoR8keEv&
z&=IKj5Yg@j6vLe~J)uMGBa;yC8pY_T;`aE>!awCX_kjB(>ZaldkC0(^0ok`eWb+-`
z=kq{xXOq!6Zxr{;^3@!fLg9}1ed<D0bfij)acI_6kjlFgfNN@W?v(!KZYS&IhxA-2
zcDpw+0U3{otg9AWHpl$DzKMsf0YGgZi|Etci`a#&Cso{}N_0Sool%$c!#bK7MKD%<
z#vBwciNxQ!Ya5)4mJdzp5XHlK3ng~r7(I2|`bU7YmmGOR?D7t9s-#q=UZc(+UmhUh
zaTOpX{`U32<_XI+18o6WS`B~s-M<`mst5jn@bBdKS5SWDo(!e5j+Fjxk+3pif2%md
z%xq!QxRU@?1B$&L5*3_acFA<fJGsT9uomMt0MvBsGXyy+uV#mkW<VW<(iJ%dF_6P~
zBq!Jiqnl^|$o3G^NTCq`$S1K}{D*7^8)xn(_B@OnCrx>cpX>9z8>C8OvnxV-JW#hW
z?j(DhY&e-2LJ}IsUGtD)8ilZ-ywM{sHJz7H#`*5VN{Ia=unM$aUy<q<y>z%hypt72
zoeIe~<7P$$oUE~fYPgYmCmJO)uB;nWH6_5ofxTEsHg^&U4YR9ETvd5I(nNgr^l-{(
z;Yo9#tni)Ru0K|0!d6FmP2Uf~?mUt%{&;;06<aYU@J)Tjlb^W>q=X&^9<i6!2f!{E
zjct4cH5h!9Lr(WUG&4UkrO0@RUBFvPd(4$+LJBN_aKj!thno^6+(dS<-Y3jdc$)$5
zwbRC-Q0b2(C<1}87Qzeehu{mu+TPNShK_}3KDZ(*7Hmd^1fNeM*GY&0He{ULB6GHs
z*$$FW#PLpbQg)O*)M{>}bw<<$bRP^QVEQOU*q9-)e?4eDQd@&?XX_`t`eRbjjRee@
z=5->BPL;*b13R*O^{f*k@a12X<6+5MeFJ@g=UQcrUqD!FZMFdpG84orzLb+rxA5+e
zv2{Vt4Hbu{1`c?^3aei~QsAo~u@t|Xg#HWAup&p=zG>?wk`|z*7rZKtES?9>XgkPS
z(!x}zthBQ8tVs9a*0+l(SQ1ese9&Y)rL<UiMtzN^M-ta_*rla0v?{ZNS#OdaiE*<z
zJsG<2*wJFMkaoovDEgydGf9TG#=X5jj-VbiGHBib#~XG71N>O65<=`_xM9IY`oJ<1
zn6LSA<5>Db|GQC0eSyvYlGZ5l*!dugqpaM#BNcV$!?NGOsreDRiZB53)#E(}?PnES
z%##}lVb7j<JzmK5C*8(rAw5y^+{$$+q$}xDr0T?0|MC4(UkfJE{tZ~N5W8NvWk3cK
z(n}O2rD+~mrd+cCt%>mZ&5sfg;aM77e7#by<F0l`JhN|geWT_0rY6(LGapH@eZhaS
z&EHXuWjQ1Xq6auF<ETd$GUK{JN^|g1`-_Z=8B1!yFZZi6B~Ao7L1hPPK7MJs6WJ^h
zVR<oIrj!y$HK)mOB{q?Ud}D>{`slNGHw;4806Nt9#IZ)B-DD;r$jt?TTRff*jz?a4
z<oc}#AGN}S^n(4^EC%+y(K_h1F<#yJ`f(0Hq9(R3MN#TM4DRpP_@+!A1vsObE0uw3
z5TA_)R*EMfxP@EPf_t<3Y2VeJds^{U%#^QWYh-GqtG+()s>z$|<}O$mxM9n4+zfa$
zi`RD#Dbs^Gf6_Ing*xyiH>b<-I$065!sg-Oa13%-`|zYnXwCq08~F)(j6zJt-AgAi
zS3h2!YC}Mt665%8`l;AL96N`EgAD!;?7u?K{ca&<z}V5zaTH7sq4B*I1zlrahznnR
zO+yIB6Yn0Zo#s2aCe21IcsY5};Nz?GmTB_9Eiwkm=5TzD^7QTvXV4uD!~G1JO|3L+
zmwcI^+gx*c>5hwOuMro|bMFh_ruf4}UbEodiY$L%S*~S?5axc0V=61IAB`U^C_Dn;
zgo(7l{)~Vv<=GdmwMsMgTJ%K9ml+cv-dEzPawdf7@`o_%RGk4bB8>5(;WG2t>5==u
zV;Cbsi|l%$c!Bf_1W+6yyXlk3YjbR;xpqH_?P>6LW3mq~T$@yg*`H6r9OnESvK7=m
zLKf|DB4!Dq`~XNBDwS@=J8>UWF=t)^thwa?B*BBX4=|gZ4Wt}wSfLYc7dUAEB*CF{
znqea#ArCdgiqY&s0~_|0#D1|Yd8I%{Iq?>tIoLS2cSqx6IheXMQSLqrqwUNSpbkhd
zHbZ7U!z)MPRiV0gD8_8>HT*%*bFDZ11_Y14oc#*s=9Z!8r<H3hcb~3L^55@Xi5|`y
zGsx_3pwn;(6^vlsWj{wCIfQ3e88lIE*KfSFKp5)m6(QPp$dK?^2l6DOoNQOe85?Rj
z_02&T<+&qOL7wzp*BiF_J|>7|h(Zpx=es|hpAXVj6O^w6%6@2ORO3;z6T5ZyjNJCU
zSJ<s~LU1m*h2Bk#-IPyBpo;}pqEAB1E!HJm?@A4(25~5MXIeF#>agBS^6JQVJpbe<
zX1E&)qt|R+Km#ZUa)o10mDW7hFFz|T1wrugEd9ZeH^E={;-f0q)ytDqZC<<?7W)K;
z^serkKzlL^(?a2u1GjNK5ca0Z8iaSmr68xk5_}8l$}uW(PXO1wyQ3V6MSD{&+1bZk
ze#s4>c?j|T=_Z){`wp6upomc97Gwts-MyX-_@6;zHab2O2!(Q=M6Ua0&@KfXUuQ|$
zoCC_kK9r8i<Tr`vBG80+ZUcDnqXO#h;4MQJur=7B&Sh2!@mRY56V&RJ=6&K%P3VA{
zS7!l;ow(bMQ12KR3e6z|zNlk%sEIW8f(V=h5w2uNzXKr>@HP|OTm{8z&2&z<t4+Hw
zpe%yJnxZn9tK5?Q4k68!LOg{SxO=AS={L*nRg63f^wiZ3vhq1Nva)zIY`wtxm>KC8
zXK7l|^E%lcXV2G)unt}oyP&VJ%YO+KA}S+Gm}lzln3q$LH9I>3h=96*_t7vpnz$yD
zu+|AV8ENHlftOyVMCxlpbCk$>&`i3sa9{j}9=ub#6cz{-A6K@#g^Vyd==7K>Z|^&j
zDh)G`Y8?eB0it15vH|1|GIF2qkDB5i)ctfR&WzFD>(;m8eMG`lk+>3y10*nDPx0&B
z;MV*zU%VoY!pGxG4WVeht~huDhTZWvM<VzS*1Q8`Q<rphPmL9wZAkkvigK?M?(~3w
z<Hr0c;QBDo&s3xd!qWL!3+N|I@ok0*R>0kibkYIW<k8`gs}nUmv3E#>vZK}rF7+^G
zf$8%9L`XQIViG*8(yQ$#CFKeA55%uk=&wMo0xQnAQ$nK?x+zMB2=xc+zp!;m;05Is
z^!y?Z{@@E7N$~64;qu_OAYrm#OfgG%M~eZgMrTAa(!+ylpK5t|xqM0rC}uN&N^^l<
zKY3=EC25(!R08Qc4-B6cm$zD>pl@PQWKMA|<>?VaJboQQuE=78Ren~SO!1dOv-%x_
zaun+OFA-VXlvID}I=Qd^jcgf3Ckt}(joT;ST<N(vS6m4VDYH;~kMb`Wm3x~-*suax
zj1>(B3Ki+d7}z`;O6k8HoI5UAd}{cZg>uc+F@Nq9Dw2LzV)L*oyT3`uT?ydkohVrL
zX^kn0;XO#4tMl~Z+n`;h!<aOqhg0~O0%(jn8-MWK>s}Gu9+515C%;!iT&RP4%2Z0n
zwbs_&J}=?;VI0=e9Yh^155BHf+GQn+U4A#2bc^Yp^&NX+;~Z}F%`;yx!&0Euxx3IN
z_yyToF_HeDR-QKk>qF`;jb{slm_LhLQ2N+@I)N6uzq5SqNx97*FObp0{_xKrLup6l
z@UQG8oFmoi0uP=(dh?lRKkxrsZTy|_pqJnClXf74s3l@FS9hnMKxZI3(l6uhWlQ+=
zk7~oum}fV<TfLah0fuXUl_}%Hy2AM@EBN(pVCNSF{c&@DU52|n_M1jGlj5KX!+)_j
z4!G^S|K+oAo7@<2e|~V86xPv0|K$xo4mT`)J}kNwN8iedqyK-3RQ_169bvaSqB$kk
z;N<^Bg-5sQPJ+D*e8qpg$?*4I-wiM2MU?rz&64t$vUQ$2{_yjkf5i7YKJrI(@hz(L
zT&P`}1wQ)EPnNmJYHs##l7A=<?YJx@#NE8C_aVj)Hz~}oAo|aHM4$JML-ymk(1D4O
z$s&W`&2GBj&q!k02lw+|Fy@~(@z<68EQtQN0C!4s>n~DDlF=po_wx837YBK!`0RbS
z_3H829nCWI8vi+!I-mcRDUmCkgj=Uo`S9bOMfkCd{vYR6#82in3{vSj2_wz_SW$Fu
zUFUEeB7bkkuj}4%|4H{?+djNktoXSf{?-T0*M9Bbuebc|A^W-bk?`)gp^Wm2N(4K%
z^T)!L9mBfvACLd(6nOvQRka-j*=br_hm8Mx%|EX$n;T^}O*w-2ufzO2fByEzccUx(
z(=hxx13w}Ve``)iQWL*LI^O<$n>S_jS<Q|9mov+Yd=J5_+GAEM{MYe*egEI@KOGA9
zu5~W_Zw9q%32(q+=Y#oksnI$AeVy49=-#n&4s5^F78lF$U+w?SzT}SmDbPrjypruz
zDETd{;P1BCzfh&l^S>Qh-V<=|s-G2(0ylDdq3N-B|J!?heShjv^63j>Rdbb5v!(yo
z=+wt#MNfad^!Jh8ag?$XVd0X<&j0?SNRr?3Kez0q{0K<6<AFNeCDa=?nQ4}ZW7J^8
zVZ?dLgab8mVq^O|pJv{d8{ufon>=N*OmQl~&|qk68wKk&^6cO9I(T1XhXZnSZmDCg
zbm>3dUKtP8o<>q8W?9=erv?Slk|e8{LnYro<My3bTD@-XE$nhmy*Yp{f`c{0>@S5q
z{J5s?1C~pvDj+%fIXrJi?AY092kbunKTPxX3HZxl*>YkBlNcoBsGC&r;vK1WU2}Yl
z3{9bI-6u!=H`*1#im)+}0rvRr{MgHS|1bzUg6{yo>t~6c<!Yjf4FSi)Vey>)a7%kD
z512^s1?s(L9#*NBA9YaM2gXrLM(79p`NyJn5#l;r|HtO-NT+|C)YNf6{(q9_>tjbI
z{H7nv_Ho(qh{8X&E%@(~{qv8=;5YE8!u`a*^$;`u^Il|HD2fzzepU>t!1>qNBdj=4
zSn%UN6X_2Wztq>0<=SJ9RWm>-W%1qQ#*k$l{G{HnOnp4e|JlYKW&DF{{qMGeN|{U8
zng73MD3_5dC2yaR{r=04LN`Ex(&XHT;J^Cm@^7i5(2&v`k5UQykLL*Xf&o2Ril;Ah
zAE7Ba0OURAX#W<!?H`#eMx1wHHPlxKy|#bcOD$8CeOU*2eyu~70rr`{nG@Je5f)@B
z-W^WbY36?J&+n%1A3N}CPrz2fGqCi{1@&IUr!!1`p48Mm*i<{r_Rd}V(MA6Kfx+ON
zsd2hPpUoXiv2Uuiz4wQ$^w&E$;`B)GeZ%s@P295o<S%H^X|P-oi&#RNYd2_da>oae
z<I!-y(KW&j{;>YC&++0@h8;^WeF69L!#~bzmku0y(cU>*W6Ra$SvuhLQqtUXk(43c
zE0W^hk{B+%N9u1sTueA<aG?39xDn<adg}O;>Nx-HcAi02O7Gocd~Zvc<_xFlp+^tH
z2><?(V=aDXqe6&}@4+02$Mn2B^lK;oic`pK|7KTHli@=>_g_F3!B!M|`6s$^hq?Hb
zUT<5C-+Qp#aM48f9GdU>cVo$X6ifAQuSXmDXUJzu4wfM4oZ<lC_GQ|tV=t@x`Bb8B
z`*+_zorGHwJXLK?;t2GKAGi1O(8ypL;wAjVcKy41p;4Dv7`DaR{JE>CvG@bxe_t^8
z#9v2u#|MG_3>;&=qop0ZXJ`MjclmuykR$y{61qOZ%6Y8v6=D5)_kRpXf3N%YI`$$f
zc>YTJvKVpze_Yu1vj6^~KmYi{>$$t30Cfe+{M(L=t0ur&1F5UOZ`(gVN`?yFUD724
z=f6t|*CFIzEdP#xaOd9qwf{$=;n1B5aO(Ya>&OYnd}B3t;s`&9r}ncJ@aLtY>-t;#
zmHG*}JCEg9USv4B9B>_?{$JNp?MOn|=`m#D)Usn4(Dk}2VlV%zbnR!Q>8~|D0?W|t
zA{77Eccx0>4{-fs_5R)QQA%aOO;VBSzsY!B=U<%pOjdKtoeTG4DR$g{7bQ9Z`>s3}
z`{`r4+HoE3{Ezo9Lx4J^*N&D~@EM-@7vKNKjs8A+zrUXgEU<pa&5t{_@TEo_)z53$
zv2}kw^V?5@{}FJM`ZlRaxBp|&V8@62dH=Q-km0zpHE?9-dPCM#v4bP{Be3|V{piwy
zN3Z<#T$wN&x(z&A<^S*fXW$=7;eOe?f9IFOi7<gY^S?VV|7x^y;xV+_akBqqimsIZ
zanH~ZU&T!-4}yF5=p5kt+t-s%hvGW?GyeTEgz7>}mGaieGP=Ju&cB(oEBAl?{_2DN
za~vt3Qq~4c|LLZxsj;p=k@i2B4^S6`Ti+X;^xGsxu%|ly<1PRCnE@>w2pq-}FMgw)
zefmFN@-LU;&xa}k@$UmYe;y3D*3<>u{1gAn_WRE%z`HLFjW6qClRE!@s|kL%=V7o{
zAtzA(e?LpG2uDKwmj1q8f9>*b^e5V<BdD+)`M}Wx{g2PV)A0o`%={2i!5M`LGa<FH
z^NDQ%p`5G~TEyN&&Gi7lPaLZOA(2rK{!r0<4-HIeg-CismlY_vHX!{TA3!YVc&b{h
z0->k@0uct7Rb7gKzOJ18=S}ElW`yo<L<5UP5m2OT3YyLYJkaYo@f`$IjNFFyaX~-j
zf|B2tMR*MfF#Gh7G))6!NHmeaF|~bJ{vlkr0}rViiL>_%ntY(W3dn^U(3Cq`okb7u
zicx64MOMJ24r4&oE2yl0x{tGW>GC(=^9HE$*NWVQ`tT^f?~1AffO{JJuyE>APh3x%
z!kbHGc*Q-pF`W1Q@}~j}^w0z0-o&fpDeh(XM4n8+^6EGMb@Q(^UqCS6fzOA-y8Q?o
zivblEc{Pa-hUR;YLW$EKjiAz5P+M5a4Vw~|Koo;lrA(c^PM~M~9W)LZve~rL+j+Ln
za|Cm$`oB4(pX-quA5FID;ehIl^2qV66%7$gEsQt3kr8)>>oX8SMxiquiOY-BAlSWR
zri-9+8;HnEfEpA!3%o@_?))NIb{}(Q(GYr^0$@Hk8Q5jSa`C|uxEq-J@2>w`+6j8Z
z4#h=)nF@TmG3zu6Z4^Sh!CRlF#%Sn|&t#{|J~XYZr*~^7qXK!dB0wnJfWeKdXDlBn
zMls$;=+4c60AIq29yG+2_v`03!d;B3E!Rv2<%Z0WG%u%t0K{F{aw7l>AVEFcZ83o{
zGTdsIO{~fOdU9A!%@yei3t3*KW)ryj9n<y5>Ah0$OCKbk!gfMzDEN^0n{&4@XgY4#
z(u)tW=0N+!N3=~mGf-r@HT?uB(FQ%czE`o644O=YZ?t%NZ7$8^WTjm>{%uBA&yd6U
z-W<@G-T9{y{no@g_aoJaH!7_?mS0P;J9kM<<%6)*t1lg_VLas0XhP_b=&sWI=o+~I
zFWCEw0|1-c?#UxWRWN>w6hXUrKU`v+qyV)0gFva9j}WjU7~|VExntJSpJW>!0fKIC
z%KH&C@?>vHlz|?MLMVcXJn|cFK$ENiFj9uaj@G~J)K53JeyWc5*HP&5MlSf++F%N}
zJ(Ih;-svJ#3{o?-9ihWAjN==PC0fA2$w4F!O-)=1`RK!}7#(5v>8{r6FJVj~6-~Z3
zj7C+7YwQ_Df)bb`+Cn|k0RYN72(5a`^hcpvQJ~tqn6GtLiv}khv^nwxgpN=ARTMnO
zc{;EERXgRG)RTDbjXq7<4pki!DTk4yLF;YfoEoW%F)D1EAlXGM#a(1#_sodyTIJh|
z@1=*Kj}=?;C=0Y-r9MC1+B^%rlL)D)@`EvNvM-vzGU$52P>UC@B{)ND?yG!mnfUr6
z%}EW!9}xX@@4tOB)GK2VE`~!Q45hu(O4w*H%Y-z$@>f0eqokuvS*{y^pUV@})!7pz
z2SP0_6UCt)w?c(ze$;wkmAObIKoO5?B3pU|o@XL2d+vj)4|ND+H$)r@E}Fy+T2=oG
zgKq)_te-oSYKgXjrlSvPXG*E(u@UdMCTSfkRo<Y2*0cPcaE>>Js4s$wzhX~AY@1~g
zt}LGHk2miptAu0R&>GDaHuj0q1(e_eeKuBmn}(swY1DU0u}`;xh4WWn<KMwy{?CZt
zKND4U3Lnc`qLVVCIs!J81OOr4-Vu=%%Lt=!7FLz9ws&9)VZacOhSn&g4~3ntL+XIE
zLURhZA=^(l%mNp!-Yns~icnQ(hSDif(LS}juj$1xW$!4rB`%5VKA=f{2vt)3CIyIX
zDuXkv)hc530ei^0KxyXp=KT2QuW(%EA~Rg)(a%<EkQTcuW25~ZfIc8y_c9EqEn)Gz
z9gYCB&psO6ZlyWs&&g`8&58|rxJC%&)BtVF25<v(nDysL7~lm^<9|vESKrLiU41o?
z;Apy+v=Wg2^{~a7{Yj=EI_(T%u>HLs0D_!g`;hIEii2PD&j$&)y<iG~zYOe-U&@0$
zrDPFI%cCgTAh)H&hC}ZF$RZNj?&R{mGkNF&yMs`J@$hpAbQslvBgEwEkh`oil%V%f
zv3%o^u|3&$5LP_%h!|l`kN`o>9cjYRmlr{b)uz<^V{7$3E-~<23Uzb8HuZPd@-tfX
zS3tEK+xkJ29iHCpj~>bq$BfW4$}oY}$1rA7r(M635S?`Cmv*Xf>~~?KX`3S6ACZg(
zbWih&z{DuF!Dx;H$SM_q;&u=qM%`sptJNtzA?8<N-geW}W>*DSIIirk*j(xKwdOa-
zYy^ZD^(H3h`2NZ1*6L6$afFNg9X8lnV^^sE`5Pe=4Lbx4(Bqfw!MdW!?GLV_Opk)2
zO*r{I`)xHXp<1o_dz)Xg1YqWn?hCz}n+Opaa7Pk@c(nlh%IQfmm|yh;plj0F?O4V1
zFH@eU(my>vf4sji@{Elmlj%)~C*2hf!I_MvUMJp%>^i7+5N6$VRVlmc-mQ$(%}9{!
zmyF1*K2s?(>DqLcyOvQNH7S>GlnQC&-tn$`qNMUEyYlka^uCIE=Go0_2UN>FwBlBC
zd7Y{{4mtyjuQ!7Xg)t_dC&Do&&<x>30;>F@C56{QiO^A*cE;BGw}l*AM_7O}SGER^
z!n036{vG0x2C%2GRg(aQFx-wx0OPbD4H0&Rr|?FE*kE?uwl7%*(!;gNd^B+W;4na@
zu$)})T&T9o9YbSXK!lk|YjSp44fNQ2%vCDUgZcGi&%t(xleQf<xFP!lB%13j1r(8b
z&}!5c?-Xa5XR5D9`~`$OJ`19i+}TfIHrFV`A4D*kiU`J<`hHP~eTSl<dN31Am+%@)
z0q!%nRtgH|a%)?_<|YP=u9Y2|r8a^Xv=U31`%e`EIJVr@4C!2sqPPlr{cf-KF7|lV
z<1r3wS*k1KA>2H<8B0xC#Dq;PjI~l2%Lntp@<M+`L%4!qlqyZAF~`f=(zm3DC<q`g
z5-5J$ZBh-q(341|&}#aqRO@3HjJta=p8ORtofr>7LA~;^dg>frVm5Xy8|nSt1(nuU
z$L57s2U36+oi=U{giXS|^v*JsAj5*wf9IOQ<Zg^#h|=86lf?LN<ekn1Rk#qn0f1wg
zr5k4C*(DAYCm*m1qrg%nM)q-JU}Rl3pe^ZG-L}TsYr{bmJR}@i3Ea$&G;u6HYXSA3
z&vzjC4h*9>@7|71m*_I$3i{PXvnj?2Q)fU}<V%2bm&$aGbYcu*+uD-G@8smLo#fc;
z%7UQk;RwBrne(xdr)RH@M~G7;>vOcnhF@?$zQ~<fOP`MEoIM$-JO*=w7BS!?CvEr#
zK3uM=!opeZ2cRP9+fuM>HtT?Ipf+ifFs<S%J<arvxD9`lbiyTK^lD=pR|}3+PL!hq
zJ;+~=GHrcVR4TjuKKRv;@tWiZ6m(4|q|X`?NZN#CJxwny-<nGD0c7Z&4$5Wz7O6{;
zoRagEdOn~XKD)y0=-mDu1baszNRR4R8(zV=IN&^g4Rk1s$*09P9-b@JS%sk+`zx7T
z-i{@yG0<m~-5Zaw_s%Jah-3nUvlqlv=X3YWd&JK1FYJSp^rW?cvt=FbiGktv=Pl!7
zg~bR3$v5z1uC;oQ+v6V%`)&$>tep;I5&UY~_0dfDoxe4+-w225QP7Nzvklb`phqMd
z13-;5z=RBnig%Yk3D|a}`qjedC;@@GNIY{WVs~0RIrk1wG$M;if&H<!5*Nw4gAEo-
z#II7k8rD=~pe(k)(6Lak#;=ExISCl}0{KGDv|<yODT>%MjYcagxo<WhsH6yqCOytv
z2WQVF6xTJ_wGi|t{Z8X!oix50YsSBEBB2;&BFdT8lWH~w$^`3O0HNTzg*ri3kkpU;
zsuDzM4~XBWMw+u<j~kemf{v@dSi?Tey<_~1{E!=6X3uT!RURTUWNl2H#Y^0T$5YW;
z!As_CLlBA6yzt~S)0mo^L~z8IMbfL6!GlBLv$EkzcM>z_0ozOPS%a!#)DJTTn9Edx
zS%NhL=_QJ}KaXMNmp!ofZT|gJX{QU<$M<NSI<>3bXOzb3Df1Jy_F}*6y$d|`r<Wzy
zxNUG;IuUs16bL#Lf!hc~I*su<MVw4)A)fES7hoMNix9nbYOqr@Gxao&%oPc-I6&kp
zVZ<B;it37^??p%n$aDp?Mp<&&y}uo~i-wF`R%$gOA;R+D1rmRT75y`I8Gd*Rkl(%R
z?0ymvjIY@ZGng0*1}Rh=PE)VA5;)2i-Hl1EF}9qhkmZZqJLY;xlIhKR0R{IP*<sFn
z(rwkz8%pQ*%JN2Pe!tyuY$Ac3c$CH)F|?Jr3`jj$IIqj|VV&p$g$pXuESI+|DdTrP
z?O`O5Bo48>{{#XXiRxyuNdAFAn83;QihpVxd=9;>5g*ggz{q9L4>>A-?IYL;{`k9<
zna3hm0JAGW?)1vDFz{|^W6%5bYj?lk%}D4E+@t?8F_|z?Gk%o@ayy^s##zuTl_Ga{
zZFlQniwtl9BTP4EQnrB7#n<x?DR85JzgmxzmX89TX-@Gg%f)=xyWKvK+J#H>c}VB<
z0C$Aym}C~!r?1?qmm+lwJ&cX2Zm>MK&TlGFwJ8Oetp69W=hljSu{uv(w*4<K1P3ht
zW&8^O6ME=~L|Eoop}*^rnVTPTJP}||c#9w84>`iNX}`r}tdVCj?bUPrR)K&s_uxUA
zgp>xQyr)(pgqd`G;9PR0){A{d3{`u`yqM3(dPVmUr`-S~lpyn|koXg)pY1wUAMSM~
zM_?Jl9$>>>F}^|?$}AMpLZJ}co27BD2!syzG@3Q)o*@;2<K!B>D^JU`n~ZLBHhgwH
z!*D3}eY3)a@mL`{U(JISM7WB0Y(0!dcrIcnZ3D!bd0Jr`$hMJ(s4tYoHVKR|H0rEa
zkuEZOy@*nM9DzrsaaVceqp#N0IEhC2g>^9Jp6qJpe4Bby0*f0~S}&zgM903Mi*nLA
zKDP9LsE0ALM6NQy%o^Zw9JK_l>#WP`VDDK;0zZFQFIgG#T6#fJ8qrGOLfx-lZ%37Y
z22e&T5uG(l5@LwndkXJ9nVoe3$q9yU*)Rj=$hou86^wHy@a~dnWbtatKWSve>%&CG
zCnT_z#GIDMnFepqM9PjHALZv~0hvmGC`A^u(F}&vOGvV8XYd4ZR5df!o!?Ps(haDr
zWWJ#klY`gQ(O^eG<su<szwd&GSg($DM(dkO9*|r2<{Ix&6IRc?8qpDe3AX$80$-S`
z{C-S!ex<PsVHyc8V_CXHxvSc68>OsR0AcT7!_XTP1&-B6gEnrtg=KC*Y1(M<0nBi~
zRAcco+ur=yP}+VhUs9F0yv)Jz>RWrOTs=55Sp4e0FvzAn_^RT9=MDqI9xpqT@AJSU
z#ZJr;%$v@}>QpU&jy_dPV;iR_SKeD{uN7H2J@YczRI-ii(^k3X0>bb<x+bDo62sPG
zwEpa-eOxA%5EP7ETMTX`k({`Bt&#^z?z|WCORY@JOA9(d&Uu652mAS69Ax-X_4Q2O
zXKktJUW@d>5;zF1hbEOv?yA=qT3>onf5%4h%4GME5}>LG{0u$)BRS2z|F*Q-m2+L}
zk4<hS<v|rju%a^5Y<2D)c1hv=;+f>Cw;yR<qPeTV9hnFz3vF+>nrEWVEl(S+{cSan
zfVEh3?>>NO!7;BMeqJ?ZftUGCtt0UmmD4d74J);a3GZabU7Ks;MC~8bDvn%iqdy+o
z==OA~qasjIKF%RSWy<=6TFvFy4oyi8Gg7hfaA7^Z(=08B<Kc>?p31uzvW?Yi<64&Q
zQyq<OqBCb(jgCE;vyR8&W{0Q`7ld7i#A{SJIX}pEKzKLH@?wM~Upnmt5lkK8HL+T#
z-rvB@HTJBU#=eLwQ#i=`dfs87PU+NcoyYQl3X0C}HHJ*3dOk2&ria<AzK`~>I&aX|
z{$G)8tPr-c5^kbuBJ@!()3-83Q4VoPG}$0&vHZi?GtYq9E){!V&wWH%(z!&J{1FcO
z!>Uj)7Uf)Yc(2b>9-q{>9ai}P^6Q7WybH3yn8jMWCs6cT;7KNL7G`|6<Ql8rQ)v9=
za^d)FZq}?;1;#l}Z(Z|+LX}rQ#$<O-CQH&$D3SD7^Cvn%6z-@ozFG{>+w!2iHz=12
zmD1Z7CGpK});W=QmEYEjsKL$n#0@SS6#CEHTAEXw3DrK=PTk-_m~}`?uno#H-6>PB
z+$6!VgKXyq>hcVUn(X;P#os8TjEjm3SdYL19oposhCy^p<5%F6lB?T+hY^3oNvk<B
zpF79rH&I*WBB%NLzx8F{aqdMmWTNs8Zp)W&zQQP4srhwh&O`jnWfjt095ONunK8ao
z?UG*h#L7V~oew0QDDE!;;ASA8ptICE-rxQJv+>^JB-boKe3W?}D`mrAEb-$-QC7yx
zR4HKYjC~Laf@(xZHepKU%vpXRu~m|^mymKgeC<ov0y+DS2@#y%!)Uns!>t|l3%w(Z
z`%g0K7JA#&n#Pr$I3ta#1Y_d#X`qlO(Eo%zL{CzYZ0eL~ug>hs3r84OaB%l?r22WH
zGN}&bWZ5*=ENVLB{g14lutgIO9+c>G`aH-LILRqxG$tC&y0j=$4RmBnzLCR)DLn<O
z%n=%t47#tuc$@^i(=QCCwKsB=gd;pR&ab`I%CPhH?Oa)-(pKJL<a(-`m(>=TpLbP5
z%@GUR^FNlkQ}H3pv%i*XPJy1`2|Q-^Hb?z4oxlmTDrLT91jXHYVC_)E8CoY6L5LHC
zDt1WP7Zn%pC)ssoZ&N7%S#QuP<W<|P!mU}>Lyc==SGx*SFAA))y9iPu^bF1h`YGAe
z`C;iUUC$x-G|CXNp@>6z=G>dTF*#*-0Z=n*=*n8V9DHh6W<4?Q3HU5~Gq!$slusD|
zL7YxcUK-|%*e?_r47WN6qW#dsDl`l3!L;6J{Rk||sE${_co$&y8ib%J>_sEnViZSr
zWhA1?pKTx#52qUA;>BA=g}$3c+YzQb3`NPjInN;&n!lhi=vI@mkzM>e1kjdKGb|69
zxoI{s(onTEf2P_PMsj+T$M+3D7=BC4zGosVO!H3WS$v`s!%3vcG@?j#xj8H1b8B^X
z#q+B>t>Ux<_BK*whZ{=$1P!46rR7B4IRwn{-$UMdZ}Xv+==-Z*)Zg#~^TEv8$wnzW
zbP^*Q%PuRjN({m9!WqX3t}9+8vfpoIA~q@${@K>3M^%W+%-jtO5j$DYt?tYFXP|}(
zuxH_VuoGrFMDNN;Ki>ox%OO2VYAG+4^>n6KAvT_MC@7~n01eF~0a$Elvm(BzZ0wAA
zwl|Bfp)4$j-c>1gedSsfoNii$rPfj)mMgG!lvPM<CG2wrly#v2`}+p7rgjb?<`nK|
z9nzP@q7e5NK^{rnIEAe&TM7A#I8v253u@LR$f6-mC(ab5tXAe8_3=dhErS=mO;yKW
z2#<i**LumgF#d7{hl>!b2M%A8NvtUg^r*55;fTY-b4#2Py2uyb)>#f);B%&SHQK54
zwb;95MB+gl&fOKNlWk3Ij%TmH|ETk75nkF_{4@Y^s)O2?%t6LOrLTaYP^ZPIdZinR
zQm>x8)uan9=sxxBF)EQmnU>>$WLBQtR_2`xw4@xa1qC`!y4{pA78w$ea&A^3Ut18N
zvH}z#9;`+H)MZ9Z#%<@`{wT`)Y^h%XYhJy<!n_NL5Q%td5Rc^C5eD&zl1skOXes%K
zr}@G<TYC($1|yLTi)1x@D{Ej8pVB2}!C_oQ<gwEvf=%!rlVVd`Tc`@T48Y(JY;7`P
zU`wSEGH$tEU$m1G^;&-nnK5?#@yr2rw$wKKY*G^zCD{POuHb-O7LoT)2JHzjQDpGY
zk&UcBR1=B@c%TXxzTKj5hC_#JX$5!x6NQIYmOmc|k?6rIPcWq+x$V8S91&m|J9wk+
z@T|!F42N0>Ju+_X8{5`E%3j{CTihVTUuoeq9U=9m+E2qUA~ACL8jU-U+3?ifKCdbP
zWyZaYo>0GeHf=n)H6@iA+o@}EKFN)Klv-9Gld0oUG>`reG#X6#B;C%`?i=B7gd99l
zDv1-{dwgl4QEB)h>0#DQ`CU|<@%uj*oNx}l_zY^2k7Nw)epn3{Sb)P!f=h+_YmNYq
zUCoQLU%+{&pI2!ZQ^9{bPFYyoOp8Sl<mG1qPtW*s`B7m1rd7I<;T_R{BI$Nz-(uSJ
zk}}-dyn#o^Zhwox8+|i@rBIX{I{4*J4#3+mzuIj)!3$ikgH7$KXj15iG%=H!FDhJs
zW<N2So5y`o+U-5fK@?ozTs3b}Bd=kbw>^ah<Ct4>E+1G6n?2vcYU_Jp%QDQTI5P^>
zG*S!OQxO_Y$y~(WMS*`Cc+%C2SLS}D$Lkoj49CK2NNUt7Zna5_i)>Q)DdoZxc8;O0
z1NgufRLeY17RYj_?uvc~w-F~!y6~5i6Hpzdsny*?T1QN-V-l_xU_6Qc$HNf)w>d*K
zL6PxMu#GaC)*!6q(CkYv;4Bdb@89)`JkrgFXh4r;TtP!A*MM-|+Y5``GHKl~0B@k3
z_vK)r@fvNdf>-Q#d(xL;t>oB3y4V+4WH(uoPRhiU27V`h7#W;H+Qahhc}DREt5!5#
z=o0Z`kTE+c+H%N~wzdX51ZAH25v4-532&$YaDPx_HxSf5c&_!S?}&P#7DeSAy%)D*
zIyOk7+3k?<B$5Ti#+It5&x@VAJ~0G^WkY!a?JZWVNNlT3Oj?GT>)YFiZJAN>+&kD!
zQuhOw(By!YJE+5}PU_yQi7>#NGSql{TTMv$yjzX4YO*AYCFE_VYc<i+`im~$zYq8O
zE?Y=R`~mcF<Ws&gTv`wmu{bewcMP$KkFR4e@lV$|*5QkYfGR2fwJ>CT{avAXwZkH`
z25^|{ExyEPpTfu4Ll}JQ*11fqnaS(QgR*xp&g$>rC`#)bH88CZ^iZ2Hp|7DWu47i$
zCOgXPJR(hCU;meC-SiRM?>7@kr0HA=o^j4IJtmHD=aohsQ#e6PUQEw}p4W&^B#T=f
zyQ<W2<td}|W!z({lMD_=@(L2eHaZ{c+$F=DI0h%J{SX=w;77r;B9VM?E+VZi=u10Z
zeaj-Sj}5+_kXj3Kq&I1Y`}22fu49CHRLM(;>D24kD|WxmV!M}|NsS_}eS4!dhxudn
zt{hz2hj*u6&ly&sDo>%JIq;;_dTr%!q;|!>Xb&sZvTK|oxkXX<?b^Y^rZ;<sE)*<x
z<F($J8QDDn6hvC}s)QI*zQ~8>FEXaOZv~u_`g{)+g^_+IpHUf_=a=qLbv)*!##E4|
zkAxjv)M@1fyYX(d<U}WK`_1(d!-~?r+oHa474CZPc&<gB9}4a-RIx&$`pjGncc^+Q
zo10E->##tIJ0=fO3GuEqnB3QgYRX=1q4_w3bYl5h%cm&$bsdHW2tgI=*+9YS!?FIN
zby|#E0@JC%8%t1<Yq`d5`e45tJ1#AE7f<0$e%3Hc0?W#*MT=~KisJiM1ut6Kb5;1j
z+$bu=KGN$n0*)?wN=a=pUov&}M|(7V68oJqvO^^RV-!OY_`UYIj3CnkG&na}iI`*<
zXr7ibl{fafSWC+2w6tSQC|<3*Qj*U^x^Uv)d~3qOE|O0JQV>~UmmWrvH&`TNuiL~N
z@2lf|Zw*s)+|%}aDEvy;lV^KPBMi(>tCY0{*+84#?dvFw#ol-01xS@y^lT$@k)y(e
zio+p(F64(aMnFCU57px%D*McDU(B5}>RQrTp-Egb&4@b=H0%41!=_9jcsXrXL0}-%
zK!p8BMJ-Pq6$-PzGR9y01lpViO~sEyh`C&6QHt(*rw~Ql4*<u`C(0#^`Xkdc*vi<s
zTB7l5SUPqi;(VtM>nc9~E`;wDKFa;l`fE}^3wP!MchCAQWvRw_DCqb<Jf>=`qJ(<r
z$)rZ~ff8vF6%seRjXv((n=yu6B!aVAetG!5Y5bhB-@{b0rM}W{5I8uESY45J6sy-`
zYt#excA@nfrZ3o2{P9ab6I7m~=rP7wICenXPqEp{Sn6)GXN?-~C}=vASh&pPQ3{uz
zzCo#Hz*6)&Pu1?YTI#VvOR0t;lb7Am9=bip(ELth269==S->&d;bX4E*bJPfYfbe8
z$tYn;G|ocl%Yw0FI?|e`nmDEUDfpX3#&SR46zIHc3{^h+(cnJxu{+lMNyJYN4#5M?
zo1UE4UhlCyfc!$1K!l*+UyX^wc<$po5CUuuiM$D+LY=(JVB7*u_Um0vAaWf+ZY7{Q
z4GBEs2gofNFTlhOj^=~%5NJdxzq+q;)#5`yD`+3^1M^f2|8Q*jcUl{Wr|~kLkgJ#6
zj1x_(e6YAsSytatSu5fgdBrZY-_Za&1Fx(+<LD!l9cpogj-hdJX_eODG7<)zo-7Zn
zE(cy7bPzSNlRAt<Oyu8gLf{xSHb%za&7kIX%6UHIQE6|3nxO8(WA!u|0g_i@`_v?3
zd)E{?P}wW5X9uiO<{QW9sKcihd}2PNGamYK>cVu_eTTj>_6^g#=jPO0>+rz`E5r)o
z@6OGsA0|tQCO>o{^|7KaH2vQ)_;36GSpMx)8Sc4gN2ZO>ED%DF!R1{Qqvt2j5KEP5
zXCAueiQi|?4LCG4$`f`7OT$U+UYyx+{@r9#lP$4Br^pL|j<-ADFjAkAd7xEzRg2ba
z$=ZcX8rXE4iHAeEmlpF6jZX)|R5|Cb?@xQ)PhL3yz?VcOwPDmMB_EH-PnO|0VLvPC
zUI-LZkscMjYO%@73s5!+gzg6~HBS}d_@^H}yN8hGw2($R1!uB*ceS1ixYVGW%%t2x
zF{76QKsVPZR`rq%CZIzwl+ei|gKb05moX?-H|{jrpd}9~f{w^uDS_tkJR)clcISkR
zd86^_qrgDr;%X2K)Ne-u6#}!mnm1mNF&Oqw&#eDM>C1d$Hs|NKspR+VTnPRFwoA?G
z+7p27QMp}Uky2(+8+xPK{Dv0OBh)g^xqMs9jLXs*&}3(lWRjwkjA;Fmf8esuF?Y$0
zRS_15<srLH#fixO)>Q7vFb-mSk{R9r>CK=0JAPIC`>JmUImi$2pxGJ#x8TM$OG483
zqiFU`qXs0c94Z@(5w`qcht7eF1ObvEN!0*lhjTL7@ssi<#;q;XC0>{Vzzl7G$)F4g
zGv?4ZUaR##{0KyrqJ?bm)F=`Z9$W;yk<1E7)QW7s0>0tV)CuED(3gJ{r0V#9ay6Fg
zy~(&_3fB??i$;c&i8NB=EF1_#65-6@c`+`ag%w+ZD|ZU-ELrY@s!bgc1FJn93tVAT
z`-dRyYXG+C0X=A%UGu_e!o^7Gf2<DOwT9+n^REN2DF7+~dD6z`EuoQcK|{sug8rxb
zJo?WGh1+iq@jOQIDs7pMEXGdkWzczaffs-)i4Gv=E*TO$2MX4~DS4<}h+@e=D8PuK
ze&Ogt-wpdZRmEXv#Xic1k9EAjpa!S(dYOjtBlWduuX)OhGv{{_*r>3V$U9M3o|B8l
z0%%R<#pE0%IHEFJT9#JHGq1}%yB9%;`Z!O|T<$5Ztq}XG;w;mT`u6sp4;leN*PRVJ
zxQ#U2YIYHtfE4(He2|HwbV!%ylr*I<$2k;FZL2^a#_DB=_0h=50OQGYRQOmtV3Ff>
zDP039lOA<+YCKgA&X8*zQkoxn<do{OX96Uy-U@(7+8*4XD5*syP7-khiSmD|;twOe
zQnljz6l;Oj&OFk~8K1bB0*zBF|8r0J?~5>wVNB&E_k2&xT?WueM<Q=k2X#bD`#D(_
z|5x`9XC_#DeZRORsR+r)n>tm3fLnF}0$!pjYfBeb29I)jKq`@KKz5C@<7B|$s&)rd
z#2iJO|Eh<bAWV~}S~zqWMwKDN7_qVHjJ10ii>b3dxoboR6^7@9Xp9vcAQC9=*g)1r
zk!CO6f|E7{l=PH*zQcGAstQ?Ysz-0o)uYj?6|jAwdIthppFYf};v|M90rTk($Lgz0
za>xVP>8}S@JzvW@z7e9^lwtPNhvidkR@y7hP5}Tv?8S9hvgI7Rvh1X1Wc>Jm5)y(+
zTNC*<d6h1B?akJT3xlD)PwiyI$Gx36$^`vp9k*R4)prmi@K{7x0xpMi0sFf+bbi>2
ze+fKfE#H1GlGD^DdC#5Oj=AXGop|uVmh+D;1G6af7IHK_qQVjL(v}$8ZqIjnN4xTr
zDVSEgh$;YiH~51M7qg<TLUVgix^WoBD@5<YKnFG?(`pSw-)PjI#_2TzqF|jxd#%T7
zq8`zhXh;e2t7mdigPH)?Ec;MNPXfdn(X?8{I9ugUERn8W3CfG%6bC{#l!d*`^Mjwg
z!W|qOOxqB<T>5znn&^j|d6=&$e@UZxsNf}0-(zXmpnfYs;kB|hStzSTQ#wV9#E{o9
z_>R26K*zhcCwLzT#By9CkqZendNIjCG|HGFv|y1a!oYSZ*RV3BJ6^>K&82dN2VQ}o
zUMccA7;DbO>=p1Wdk34Ajz`!$QStBy`YK|f-XUE01&^}F%l9q_M(pd#AnSB_o5|}X
ztggKm`{?o#6mTheWBcq`TaJF~M4`c=7Zv5)9^YkXgI0fag~yy^dy=%nB;DDk!(hEv
zH1D&0;>xbPvFmz<=>A8x)EwreKP!*ZtGF{9Dc9vPE-GG)c|L!BvClHcX6Vzal*kf6
zIBcAHqOasAGLj-5Bt=~#=N3U*bO{?tv$lPEMV!fI{ufL)TMgi&Rtg_b&7f-(7b*_b
zMd0nl%if7-zY@*GBHR>z;rYZQfNNz#E$IyQD3PfnCN0R8cTSV@z3JvM6jpCB<yY21
z1gzJ7`yLjS!g}BYR11ewI6lQV&+cw>7*Q`e6BS_9?s<njlhjeihAc?*g{Sl^G7X9C
zqxgq?I;5f9A01H{d^l9?`*nfyNIKF``fKg?u`v9;+^92-<-J@~%_k}QJ#pQQuaI^z
z01W6X^kWGTVvC(#u1@PA{|dKQc9@NfoZdWNqx{W?{rksrBA3~ow7W*h<Od$QSuz<1
z5XAyx*Ldae#B5XSYBaE?*IY+W0`PQ~G^3#ynZ3xxN{Z3bY-in3Nl8}QHNU)c9-ApC
zt~7S7)|5{pu8gb6%`n&YMa{~)j&Y*9XKx%B)7*Xb*#70WCRpTSO)@Wk;sS7>2z&s0
zvQXSAG!aq4P{bOCFqv&{FJ#@h<5zGSx;kF$vhAXrI$KXy(n?m1_?@Ed@IeD&YcX-=
zW;LL6s12TdF+L$j<exuQRo-uUl&83nT=X?feJ~l@U>n=?0Em?iHY^QDj0LmJoXs2>
zR2sjZdq(2nlhyO(@<+?)TR5~!`m{bGVFq>o=Q@=U>q*t`ddyJSnI&TC1xPdnf?JYg
zUTr94*7jn(@xtatCExMSo<jbpPkFAE>Yx!VMcXFeLT8(Cn>PzYoCU><bh&Yz!B!gT
zEF;&`x2eqOxA)YZkXzb4EkZ=(QzonAA6>`%*~BN^3*}I>RvPCqXOQ1?!wQ_oc|3z3
zYLlnw{4ek80u}Uf4Ai>g97s(kR5|sc5ynARGxrnq;crK8U(B4mJkUxa-qvfM1cy@;
zABiKHl#W)M!N$k!?zr4=RL&Y-S>V-kO+AQDM2><rdif;C;U2u!5nq6J8Xzq1i#9K2
zdPSZ>nw~kCRqRfp1n=%95DV$YQM(den5r+&C;QqC;x~zE5cLKkVJz)LuMhF2mvfl+
zEzlpw-N%|oIO5G;koAx&&vwNE<~+JKtR`6R>$#zu@=9PzjP*fy)4r0ewf9?*^=fW(
zjv^zSfT=Xq&%3%}!jE<S*3|QAu?o%RBLjSlL!kR^_UH__uGTsc$(21YzISC|X|SXB
z5wtY|6}&*R<ml0%4j8pcHNznw=Z<EW_13B?Gs;4pull0W^&RKZ<eH3hlbd^GjLe0J
z)@c9txf414^^dDBznM|-eBsR#F*WPzofBhP=wQ+#0t{;rX)J3=&{42o^&FJJf5734
zDw-hBw+&CmoLxuL9-WWW)r)l=en3&!e`U4IQ5dZvErR4X`;<|`QBKAe36pG8LwVoH
zOq!GKU5OiV^j-kb3{<!(SOs0OAb?Akre1+H^f^%(SBF~pxnxWo@P@O1FfOc~L0D%5
z`99Bo{5kvW02mutEp6PUP%DRDUUok(US29_S0>qp0f>gsovmOIOC(O!JJADecbf9q
zIvf(U$(Ot@?k;Fl@z)9`o$q`CpbpS|ag547J8rhMs!x>hSOf0LL?j;&7@`+xj(U-V
z(<MRqd4_W^bSMR)H0*En#_JG3#SFP8%j}94A6p7|(5`8mE@382&7+<Ay3UO4QxeFs
zx0M|-BFAoW;MmqbNcF%Qpg<3=Nuo?`lN*9aeut8N*v%37+CwGH!CIYNhV9*3XgUO~
zoJ}2C%IzW?6@}=O)dzx_ge*ghNavN?TCoTdnQ<%a`{j@+j8p_>1QIUd8sCv*OC5h-
z2!b;?>J-1)6$)2Lp(5$?CjxZXDGKAJUsb7SlIzN2*YV;Zj!M4f68h>n<(78JrzJpL
zQC52D^PCPMgbLBg?+A@TsweoR1>&bzuc*9}y1O|msUzqmg}VBoEndM~K;{qd0>Qet
zPpfP5Pw!oN|2j{dUf(&B0ks*wLufQ(Rb5AHdFSfQ_Klb?*8*qpITKQu&2s7xB>&Rj
z21vK6SSg23Ray;Sa|^PGgpf^A(5#ki^o7r)>B~zxPHS&prfkH@NJ6>!#VbM7kCmYy
zHP1!U<V)vF2Xo1Md+GM_Uq#K0YCI-a$CGl;VjQGorp64$DbE8S?rdr1ZcRorM_qed
z7dhU^@#7U@(7z}Gl#=ey-E5?31W$%iFrnx8akWRo!E^m?=U*t4@n3L5f<xp*&0k^&
zrXt+}<bm+a7hgV8rXVQaRSmP_blw&kKT`-^bFF|e7XWX%fyR2zTo^qZJ)U2{yJBxF
z*I5&gQ+<tmH$3<sukFXZ>UFoExDfkJ!Tynq0l?as#`7%*%_3|YSWbJB&&8c8Az5^W
z)cvj1?&f9DFk;yCtJ&5L;)%N`HXhE*8|yvqVUbo?ZzD|_P2&Qx-ry-32{gK<0pfKA
z6!g{wwRjOuogAdpHx^uV97tD;DuO=;0f>cPfYPyGHeV;@{R61%;n~GOA1W(f^4JJh
zy#K|V$?n_Tq$iP>+I<8YiV7ZSp4UiNV3~%71{1>M2Tvh@=h8STQ6n8m=bOhuY<n8t
z+`ORRm+0JWY~y-~3PAj<vctkO_ePFv1ANfv=p)|}1^1pXuGuw~g{H-<7VW<>F%Qj~
z>7Fc&ROxM$n`0G3PO`>IjZ5R;O7^vXtrJbO7w-RrdT-U9mX;8x69tEoO*VZh6m3s<
zKJ3=XKod4<A}V~0r>sl%ENC}|&5<Em#@jhEygP^%QYY#T;O2-qdG3NTt<;ASIwQ2I
zwDS>ANm;z9TSjT~%wq##nn0e_*zYjYbjgS6C+$LQa%&$@C`p}nC1FjXb1zcKb*FbR
zewOGtLbyvK4@q9Wotcyv|32x>Xz49rbv1l9%Tbq_mL^Ty&V37^WM2jCiCctPj*sJ%
zt$>hrNI>MmI{g-j6h=lSc%NB{q}M%F!xTxRv0dlAfmHlH`^ts8&>1%14X)huce{Q>
zGR2l{Ep|NkOR$>_l8!A~1BM7;nV`?6RYCvyHf$qws@WNXaO(b<oOL*4*hd(<X;KTy
ziw6<_w`(t|9lu!f6%81~A$cQZuysLXh>EoVw4rbv)Q^085E8f|`0Tp7g+-uJ!a~FC
zy`%dK+c+(!Lb(g^q*~)&Cfp3J<DEOOM{|3~b?%B_fH|&p%^}t7j%+SGOu93$@)K+1
z7jOB;-}?MsOLs9nU*}9|GaI`UM2ETO8T)IDPx!DeHdNBzGdAtJYl%8LBYQbVwFr(o
zsAlw%N5!k+)~tIK^bt?FWjuVDSN=A1QbNotr;b^sZ`0)}K!+V!wi3dI>e{BD*m+yI
zv7?RUMa<)e3)sA-)IC=hwQl)`dnJvMiVZtAsIA?n7nWO=(l5T}_1u8ZsY>L<Cc;Hd
z?x~{HrS9C-OaD!IW7Nu3tL86A6SIFLi#abCkaAqiH7&<CT9Y@38P41cFrS5L16OdK
zBJ;OVV_G9GlTjR+J&~8hw?}f1sl+1ILOqh;>oazHxlL$c*Hp?WNj`8KG`!*qo(#`N
zdYx#b1Ym)N0mL5Zi2_ItUx`#dlx`td&@0Oo;O`2uM;o!$ts{p}Wz}6B`)XN;@b&dR
zw>e=3>wCw%>mfxMm@5a2+gYqv2V^;LPfxV*w}|zeb9Efu*msJ^^_o>*a6c)R38=F9
z+Y7CHp)PDQHbZj7*(ndCR#r+cxXp9v-l#_YV4ATQ3Ol;qQ8HhHFr;TG>Lm*AD|>WP
zb(0#fRDNI}oKMdslZpVQg+tjZh3L~HS$M(kZqGQJqpw$5MyWdWNQ>La?gh<xF@eJ7
zg|^4YT}sr&Oi{!k$SrA*!QJsC=qvGMr-*&g<h$0fjyo7(@~&s`uD^Mt`)W(&%k7y<
zcMdf{AJB{#Z)sqwCcS~1VX~C(u5giDSI5AvrTE<2ZhN26&swhsifKiThi#rK7+NQv
z_QHN?G%oRSNaTlt%L=aZ(RPwpG)8O{y9nAtE6*t(9T>Jca>CS43X)V7(<hzNOo|yb
znam0&@V0fO6+-4Pw2dT(eMB(n;j3I8`<~&}2cQfd(`Bse;Flf9B#%e7A}gxS@HLs~
zl5@+XBu;PapzdJ?ei5nHi|MQp&O~DC!SqlvPWuQ=wIX2V$Q`F5oqYA7G2D_(lInZ(
zjarp<8cfiafwYi3D6-T@Gd4%G-^<H39FkxAzOagTSH~^z<Ig0NB=CPiI_c8{f~F$u
z47ZVFMzGqKS(tBYxtA_u`C<*B+2G_*u#;ujMtqn?N!W-`d`FT{zA7($l*X}@bL>>=
zCGlpF<2U$QB>Knby$shtt1mq*UL^s8llbuI_xjHOD^cGs#OmCB{DRwT_G_lajHR!Q
z`WK)u>OS!%^oeH0@U>kDpL!r^rcLggT+~y^%V2`I-R;fCvNusYRbnG7QT94M#(WW?
zj>;<b_NvQ^3W?{bW=6s^O(QWLmK<~_QywNEE6gP0axFO5<$r>9hRf=nNTlWBV)wQ9
zDCOBvd!@*J_M-^m;jS^h8(32eR0*f3BIy+erq+Y+g(-WExeQMoW{T<z1mXVXVe7IF
zO-xG){xGp4MkUOO7jH3;zg~CsmTK#xlTRxzI7hT)Og*gHE9`OPlEmDhFy6t2dNH$z
zTc@N~tW+IK?|I#CUbya6tn*~uYvwG|;`*~Eb8kv*d|dH3WFxY}>=}4ebP~CeHksAb
zV&ACk2yNBnQ_038-xWi`uQ9tZ!Jv30HImLNU)eGe%j(@$GBEG#N~kAC6O-ScB=n)?
z5HpnHa53g^F)2KX8kr+F76I2WZA~^N!W)cC2U%5G&-B4*rNXT~een3|bo%L!M_ycs
zL^xntUke_57vn)bR#w@f_VOUnYnf$MWcZ`Wkr##S+Fo-ronoj|JXhPl&~fLYP0d={
zAQkQ@#3b;e@a&G8N~U3LVkarKb9|i?bw&Ie)f`bUwtacO=HxtJCjF)hjlJ(OwMzvC
zM{KfaamOD|Y2Ju)!9TVb2@_(+iOA=}=?Un=G8T11r|4zt9!=a*-P0^5@t!+Y&;*<O
z5R&Ri2oXeUl9aS7rG?aGLH8L7(KjK0-djO-ojJJvC@nUjoMg>@S>`3^vF^p?@Lg#$
z!zOXUTNq1jETuG43a1t2y14~BLqZ-0whvwap=hY{$Ds&3<YH7FaKNCsS71p~+$@Ya
zd)!`g^+v)jG9qtZsBd6jJIcs2G}uib=-k#$9rN1%bdb-NUdn4?Gb4LRa6W#$_vDNG
zzG3^1`dWs2hCX%zon!wr+Zw5puGDae2HDC2+wGtaD7PNjIQ0A!AMq%TcO9eE2%fmZ
z;z*SGYFlv;;A#Uw^;kf^Y=ZVa_T7~e3Wf;_C$IAx4TuY*y_LiXrf%(glFk5=m+%4-
zXZ)GE8r|si?i|-IM4%fce&JCe050qj?~sq=jf3o0tZui@${4Y+b3?p$#}hXGDW_Yh
zSXk68#9|T(IYAGTB={juD5$&CuJ2T}T%_<7$9I_^tx%E4TjV_5P)q|={i($g=AtG(
zyU#X}ScCCT`469b=h_BGk4b{>a8OZ5JTpB`qf6oF#?6W@C*vBnW|b20rv2J-DI6b}
zrVggb(zqCZiG71`vgWtAj{`7f&lBefRoR)=$*e^D19LxX0X*0(50139L>aF(t+bSn
zR(~oCd%`%L=Q$W0ERo}&MW4p=e~f*1JlE~penc4!TZoio6tc5QMP-YSEi;>p%t9$8
zY1li3?7g?F%(C|?KDO-a-+8&~zI8v}@ALfr@sjww$8}xjIL_ld&XS@Jhui8Oj0)Ce
zQ*#94<z5KvV%n{->4w@ce^w62Q(G@zB3z2p^Rx-rYO3HYBjkR`k0X`8LeKim>8fxI
zt>Xp5E;)wlCw96doz@0n@ap)_M}#w;xR?W`FG)w{6M_?&W~k?UkE>+4Z`BmP`u14w
zrQXq_ca}Vz@=nBNOI>U_sulJ{B`B@=E<@*Sv9z$M57x)u26FXT8aG4MOY*Vs;m)@E
z_9?G3BF4jn*Hf&$rW{lOW5tMR%id$Fx`;X1N^;u2(HD($<EU_3$anP?X_6Yb@X(?B
zjdnyy`c7AB(K!#6PO3V^ClAIja+!^7$EHaqtJ!nLkD68-s<kp7+GI2Eb-n<`<nD=_
zyGIFoU4kW?X4T~DWFH6@tqJ-WxkD9`LFdsF{;J*zhiQ|x)&pzJSh}A-HgBdRui1Ui
znbM@xmAqY++T*#c!_;}L^S4p4mBU^sFeSPB*x}`6*NAKzu@Nn`d-K^Ttqu|2T(O$_
zmybnZRt4sy=PxRn_R=U_oLIGapy}W88GsK+#z{m(UTZa0IM<uhsRDL!mkzt=p>!+t
zz_TCa#vw%#jt&n)$Rf|YH09~s;&Ge`Hv;yOPi(2Q9wjlwTzLLK>}PG(qjNXYxglOq
zP>psFel|8BCr%u`bWqOOB~5ZiC>~!)C3GcE%&OTrC*dr$CQ-BXbnn+tgB=E|Z{IYk
zFVq`0ysG7VR{MtY?V+j$RxJD%jOUxW?C#(0zK+sTw(D+}9UmX@(jWq}aOHMQr-;|R
zA02IwN0f=02Q<Ask$#M@SGBH#o5PNwMO6N&yj#i1GeNL$`(0D7YaUAZW_{F>TllBq
zK`jr|lOeh@b@MtSjWoB^iSDNki6aL|H^|&Y`<!nxs(;{q<x5E|{ImRUs7t=5%(_%W
zQa#WPsF>@A1nC-3lw2#M#Byy~Xx(!N-<J<Kz?;`y6#6TTK1XJLO`568PEAf`8>!B(
zO|SA$x6k{&P}w<cvqI}A@bKG&1Y*V9IP}@^^_&QM1b+?HP0E4ft~fJA_J}~F+ijbY
z9(hNmL~`TJJqf~@D_xI8Jvk@^1}}6HX@8~Y3An{h_ms9tPR#TCNZz#z&-p7Hv?}lp
z$1?;*X21;e+Ge|_i@v7&wTUvS^(Z!CVa(lpmXA|+suwi3Ec)v}i!wHbq~<~XLi>mI
zo}()VG3PeIJ@x3B@XmJ9Em(!6MfUU%RH?6xrZvwWq+B_A?gIa9n=qmsP=V$wy%B5P
zQ}*DSR;_gfXH9yIE%p-Q+w{i`=gZaUIWFW?k5or)pVs$@9w)S$1KyeYc1|{9=H<L9
zJBs@oc-asWzWwO3ed0ini6a)a^NRUw&kLQmdOW2sfAX!82c0U2brCM^IiGV+Dw}Uo
z^GND_htzM5k2W|r0(mhUC#XLZv&R{$0LB(8!WK|7`>UP|sfNYneMisxj5D9u(xKcq
zbUIUW&U27&)NJAP;nH{Yqn|^nuPeC>1!ZeoAH~pOmHND${G_-FT#rb<TSPSnvy2bS
z5E3Zi$=~-phctg=WZjNF2`zedbMfas!JZpCaTWr>{c&$@Z$RJEGehcl^_cgE=La(S
z>_U&)m^xBkD0m*S!!R@jMH@}eZeD)YM_3o>n_d-<CArwDH{o3odpzqBrO6&?48eU3
z$+0-S#LQonB-Bah{+H~eEhRSI*!WpG{545p_W@mRHVPuFQzKW7`=+I}cnh1+zto#S
z?X-Ut(09q*<k;Q4-FVG5S-MaF9)_L#O!x_@MWuqQa1D<kB(nDz5)M1`k9~@9qB45U
z^B~t>jn$E?&hk2JuCh#lk=KAbiu^c9Z(77TJn_^wa$&J1cP;t0cg@GVVvRh%D2~8c
z5F@!Oj8%ClCWX6_dwCon6zz<Y#v;xBJYO#3JK*UVdK^heiJ~sD4~I5oT<bNgl=Sw7
z%2$@}+xRk-PRDRQlF8>|^fwmkgHSu7hg+LrGPUYVri+zEyDo#N>TEZ{RCPIf9udt$
zBde4&Kj8J_!T_ZR`ns+rb^lA&;u%f^l&hj0v>G@)XJL%iiHxH0@<+O_IT?1}#%Zjn
z-(q)B!#H*LUrdt22I&uNcbLT8axCicmn;P4?p223)3o+ocVZHnj{A0b44TITAXaIx
z*@y()*FH4cljv*u*wq9ct*M=P--r4MXJrg84(VR=<Z*0d08lYNG=A#PJXCIF^PpZR
z$>+B8=9x?Yj7FfeRr5w5&%Wjl2bQKfQ}xvpw$E8h2X2F3r6fl>>E0uPf+JAYJ{(i&
zI%94;;sfm{(TZyP(7<2mG=Hl0Y3s9R*MQ>@E(yhnxRKm@|E#rpif9?1WE2CQ3v@x+
z#T%yNj}dUgTnbGyW7H{uHql=@<E|F=D9&Ap<dpCE=Smv@vpWGWcFyL<0*gSB?JXcT
zxU)=7s;M>{YyFU38U0?Tc*igFdFp^&d+U3x6mu`}YO0!dmB){El0aYUD9$}Jl{Ykh
z0@wUgz1C&1g9lOG7)`AWU65aHToxc7fZ|kA59P(^(MLT%6z)Oo`$F+!>$kj_i!TOb
zsG>r$H^Gm-C;Z*fIIu|WvGhZvmWtB{XimBxVbn3$bLTQQ0BgATQ7Z9=^m&LVI;sM$
z?LJeK6lvQ0?(cmW!O4*AEdUyP?3o25fL)Xj!-8<Kn9s_u;`|(D0hTcdGK$<hFto;a
zsqf<qqNLo6^`17|qMwp`xUQ%}w1dzrM5w(Dqa6s%MzB&LXT!BV0CzUVC_ft_0@HDL
zEI;Y7eOjbxy<Zb8bS~(7RVJ~bZjI0qbmfqVAvz=Oa1-7pEb1_{$xOXSm{P`oaQpGR
z!S+TS$*U4{wosp~ZfjO%N&MVA2HF7f41mz{VrS+3y+*r5Ct@76(wD&Bjv13hCCEF7
z$lLuA?qtsicOPStmt}t42DAP&&Z9P02?^e+WDt1>t`8(>t`eEk=L^f&7vcF}dky24
zpV%CrG<4fl=HyT|hm@o6O-@F5&t^nGlZdo}kmWJ7IkBULNUV3GAS@ACr9r!Bf^;UH
z?9)n?^q&IxxA+gSFPd2oq!=*~`68u}nKPvTaa+L{hpeZqW>%?<b(?TXP5B$QCLo2r
zd-Q%sHKcAmCp}iKt@=8vy`S>x1!!*d+fA(067kp9z*5N|?bFt20va$}wr8>J|4}Tg
z#M5_;572f2_<EP#*9H2<3-9}X3$jFI;@>VsrV;4atP!Diqn}j(hfZ;o0!icFNr;cF
z(_aO9C(&>TLX66v3a|07`9W$S4FX^PIt8v0hS*!c5|NPV(4Kn?qih7;O01Ux=z#G5
z>_ELsv#@r<AKuCmcun<?_Mx03LNtG>5g<h3gyqwpHxf`rJ`E9TFqJ$80K%Yrv*Zzh
zB9nVQh$Ha{f`*9v{<Y33Vuow_tN?amo+7S0iDW-i>*MS1$?X8n9le<)(o_56=>vrm
z*vTehGZIQl+@2P|a@s$z(KY9V_jcOewvVsZjPysWnoRL-lfro&(eLs8Gg*8i%>pQ~
zs}(#vx3e|4<B9r%6GFE_b1W9^PGKd^{ypgHcUg4T1h^}oEVE5SA|YLHF!$@`Np4jr
z_QT8x62OtDVJ^GEveXyJ<?nHYe~KLQ9Cc69YCIEWRgngbn{C<q9ZC$X?n4E<L7)Hg
z;&ENv%KgTNZ}QW9M?+|!*FLFi>Ai9No1+}2Ni_KlKydfr(bfqa^ui@(!FJ$NLs}W-
zYe>ai96uAB+DhgWagcqFu@qm(6we9Gd*M(sv5^|{W}+7?0E6)<#59HKrQqbpv`P}G
z%1pmQsFU;S(+Ja~^lM`o+0QW!seI))q&TzAh+<@znurD-I+r53@|77$8pOW5Z3=V6
zGKnpY-s%!9>Vx4QqfW3U@9lanA!zm31uLDX60Kq<Pbe<2nSae#%UGhlip&6l11f<2
zMb$M2Q|e9^v{H5*p%cD9;=Ne(D(A9;Q5C~1;C+-`O(U<#eLdHf(WsQQ^5?x-Ymk2)
z1@f}xt@{BssnOQlu-E?*+2s67j=2B}tMFHYJd9LC^xmQRFCb$fuA2uP(g$L%J3#Bn
zu6TnCad}xaVz;3&Z%BXrDH^@@PE|{cmt2|#T+T0t4`TEIIBEf+6z`~rehB9xHj^0H
z<EixT2gWm<Sforg(4R@mjO3Dzcxk-bRkVRodigq4cH3l*<lHL3s2MLYLw`lri##Kc
zT5$wP6?qW@+i&g%l7WAQghjr@;L+#A-+u%+bq7pTDy_#4(75-xO=)TT>w9|3fD8%j
zDQrC|Y@my44Q;=AcLVkI_huT+mrzi028K%J7JH=AI4IB-QoV`YY=~h@&V>$IWX{hp
zwvz=em-Lwod?hNw*l5DEScSzGmoQ)|Y2xeO5YHQDI?TQoVJ8{Cf|Fn|Id{J>LFV>v
z#p1tT!K6$SMAOu5A+YqHPy-AF9Fj8cEI>yKhN=vTr`okLNz?vlgTot!TKDG{SVx5q
zVZ-3`_2Bvd)-Q$k=a}RBY~61@>5BXr#r4^mgRRnE?U?-7eFfog7L9M#F%?jxaE$*@
zP_-zOVo}X{Obwc>xtvv=hgR0hAT$*ErqtRwDRPZKkNTshXikQMdcE=~XNyYPZYdv@
zn^N7J6bU;h+%RI@1a+V+3>shBn|s!LDCSVJ;W<>=K*MojPR&ffNxi?Z0(aS65^ksm
zkfhvOHk$}kHjJ;Q*NTK<o&sj>In~<FD7Dun9Y!w?^Y^8lJ(bb8XWHK{BcK`P*7#dI
z@t+`ZS6KT~0Vx_X#NAuDc-&JNhB+zoa!4QL|8$63%>GQU{+qkuD{{#r`FYphYZN;?
z`wY!Sh%D%zaZB2WmA@#Pk?kO@tvs5-!P@gt<-p_DfFi4CHZ0E#UP6*e??S#Mu^;~8
z1<p>4enRlAx1Qi<U?;ig)2IH%0-`>?AlAb>-}@;j+4kY0$e;mn^PMfArxKn+0DSGK
zi}jSv2E~^QmbWE^TF<HWG}n|Go7r~?t3r>_lA5Mhm!|Ljjs89;&9+<b-2FdgDMgJ)
zKVEBlj1lPZdCJw1%?|cKxe)(=Q}2EjzBdQHFUEaN)YpX*DA&LR9k+=1X&BEDFKjJM
z>kL@<lMe>aQb4FG?%9=OaL1&=Us)}lf^^2<>CqE(z>Er7NdvhyQ4DceqDqV18OmG`
zG!m4$9NhnQnT{9wt8&#hfF#qKOOt(hV<EMFEn<$feGOPl@d^P6t#2RC#=P(^;0UZ6
z>>tU*I~51ili4RV+mcZ%sORf>zs_zjyL;$C^*rrANktbNb7wuUcvV(-6>*4?ARypf
zz3?#VAt=-|>T?*2UhFL4dGe-Ck$q5|X!xE~6RCE-uqK;1BHAF?tB;*aKHv+@5vj0%
z`amyIiHmX*?J$;KoHGU$EGhToYcQ($_^}0C#kG-Q$z{OwRbVfKzx>EyJ`7PWyLpF2
zuw|HaOZ`3lZnmSuLj5>6Es-%YUv7ueyVv0~{39gvcQ~_0wbFf7W(W}b+C%i(VakaP
z7U0YY(4-q@nuK7kSvUyB9L?8CZo6CMj{>DeT6!BgRY<<QJ*D0*bwu|djwK=_E97$5
zghTh<FzTb%Xxa^dLp;mC3BQVGS<||jDcc(oAN@5rUl|?x_qh*tB1a#11#Lnhe!bX=
zHa8>deR|krSG+8jzb9kO6!dxYNw8(B;oEAXuA#Y-T+E)(^grw6a}j-167B?3awwbJ
zNK?;;w*pQ=Z-kppqR6Xz46`B}^<@ia^2eLU_n=O$Q`d8~maEFt<h?(ay}Y2S5b(}V
zrCMjpPCPxY2P8_`wtI=F@l^gHHj+0v^Iqes4X#P?ld<X3$>1%Utzd1*8Am&{{ep<*
zq1wq**+J(ie=*q~inlD|oXI@NE|(1_G)m)Kped&GU>_txf{&q>`bfDMBgwcH&UEr0
z;pbmb=07q-7-D!Beu#eCc(O4tAxq)Qnu&086_9=oOvE3bGfA?kN8CoR<S#YSMw~#^
zA|)tJ;lTPU`!$GkSc4M<U=S6wW%SO_zY&vnK!e?qTzY+8z#z;j%IiDJ5XyZ~#re?|
zyrQPqxb-e`y`6Y@q<qTq;f8u_f*~Y<E1S!CDp@o%h3P=UdJ4iJtdK)-oPL&&(cy@O
z&}VB5wu89{?V-Oi1$V-2w>=xcnV8!Paaz;_Q8zt)mLP=L$t6!`fWomShJN+L&2jI)
z;lFIDu-m;D&IIXrcx)%bj8xp=bGzf?=$&~ZFePw#Y#MO$a+mWIhCtbj0E#;<4$y~C
zfm9(-f4-zRK??}E40~Adx}yoiUuJpgA<3ekW{J9bArHc%$6zm5jsst77@}qG0v<HQ
z3Y>tjFhKFcfQNXGWNbS^-8PfLcx)5IgEU|6tLWnCq*`czz-SoFioNG*cIy!b%-cnB
zdNF*Z5Ewn)+!&4aW)CSiN0kfIq*tX&Frz07Rssua5N5el>_X)Wfu)ZpVnbg}KdVO~
zPsg-b7m$}n+)`WLa=q^4+yOOK)0=>LTslFtI0LH*JWLBbufsOMz)KD8mCn8h_qczh
zR#>;v>j${3%wQ8#r+sgaCg%-94e*9i>{)}6qJ|c&i$uo+ZSR(E*D4GYfi<*;gOkCX
ziDq)Rvx}DfBhdA?_x>&Y_(c<c$er1Au!btm>c&T)V5`cAt*HP>n255z>eJ0?f`=om
zg)%%}#jw6!@zllr>~#uhnZP*0gO8C{#5MNEhEZ2HV$RvgltT3K2{iti3wj!;zB!0y
zy&}{Gohz6UrHqG|&<;R~K6M%QQ~?@=dmN_6ot!E~IJ_zGB49k>H-;dMI|>HElqo05
zCNXvg&2C0*epgzuK8~tUmL6pTrub+E<p*mH#DG5r@%cu@mNA;iirjY)P$IEAb%kS4
z&<J{eoVcSlh?YyA8|>7$Tmk}3cj99L2vAqY<UM_{wpC#CJidBF;51~_JM^q-6l!<h
zf#*FsfYCuZLQf%GzXZd(4jOx3Tf18Wq-Jvb5;TFEIYnu^?6j#i>Po!(@ZQ`-fFCj_
z_#aA<q$&EGmj5Xg|C-kr_4ogS1E6o^U%CO_wC87{tk|j|&74su_=SP*?Frc@Bb_EE
zYWAlDc>!%d77Aj@=GR5gI+khj(?cZ^SR_fq%avyxU?$$Tn|An!i~MZlvM@1c75kMJ
zh%QfJqCbDb&-D1HO>YXLJS8@@F*BB!G@cQSngl6L4Ji|LK^SG$IT@e9Z)krNT<C+s
zE;VmL@gchFm!Ljcy8%=xO-iD`lUk_pqpYwC#B|ys_;Wd7GVv3j0&-AJe_4jJ)2F4~
z93j%1n`u^dQfm7{5zq*X6l?>1zBwj_7%^TvG~{s2s@p)w@)P41ad4wkr-bSZch$s1
zI~jDm|45Sl+K+!J=Z0V2C#R=6S(T`+unj~LCYA!TJ{Wj)%TKG(&FYPVMXL-yTSmVZ
zUhGHH$NiFg!Lq3e5hg8Ew>eafocx3UD16=oDz~}29Im75y1>?Z^|B1iMHu`0{2qMz
zBrPCy^?ouNVzB_xO#|-sifnsC;6#mK-?6e_ceuxE!#XrtK^l@RXBxt7KkP|4#z)uz
z6MbYZ=e@^n7G9KgR#KD|FhF=k?<d;)d9V)l`gVs?3Fk`HE%FUMO{HUl^LD~EuCi(G
z)RFj|EbK9P1q4{Vdt{`);Ns~o3#-)CQ)_+Bu%(o{fM%)Pd~H%`16)z_*Wu8-%yNri
z{4Ce4GakDdXzXv_TT7sD`05L^WQWtezXGBlBF$MvW>x@Uil$c?_-xhq?ARK-X)w|0
zL7e5#%WKeZ?MbP_Apjv)SpysZwfcm5Y5gn#5#o40<%QQ1t}SQu>?cyJ+%(LpT!AdL
zg=8y@l;o}wSQ0h6<{pmU5A%;S;IC9@_vt{M+qRU>a9Nli(SEN~&>YdtsH}Wki%6U&
z`5vF+yLaG?aP#XNP+!P2>1u)6kQ>vxCra|wR`C}DSrj;dkN$wbI{j+Z6)LVFHkPaQ
znXqAGuFy}pMoWbB8#mrIiI2I$Qr?5gVjv*jo0h}qs2ZyOMxp{Hymh4swo=amWTGPx
zTB@~%S{0mvO{j%j+=wD6@{FmCu7Vkaiso5boE)Y9aWMa-fh>xPj-`J{jvb^!_1`Cw
z0hD^iw(;r2_N!ys>|Bt6uVDbgIImN-k8i3Z37A87px%;P*q?PE4wfrRRS~1dnrBk!
zZOmFW)d#&YfoBrI>A9a_I1i#JysF+Ig#wQb946=^mCAl&dMDmlC!Q;Q<hqd)1cTRh
z*tU0p5yTF=@XnnR4MHSH$^~916fKkKQy8!%v@C?82d;Hm!$YbpmtTJ08{>+9gPu8J
zk$#VHxqA*Pc@G!t_e#lY!?OkXFJ|Qcdn(rC_>~uEP&X&X3MpxwSisbN>|B)3NKZ^l
z;FS~Y13Ib!hejNU_020H4FECaAwh%X^Gf>PAOxD>;cylA;RN};S}`5SW$XP)g^Wrd
zhOI!&KNMJ<JwP4c9ui41t7au(0`#X+0nsv#LHo?p=gDu18c@saFt~I{Grjkxpy#@q
zxi#D*aB%_L;anTvZRoaqB`w-)^I7ACzbFg#|1knJDjpx5qbBY?h{hrlQl-0}2WEa~
z7%f^pJ<62ghY@H2Zhi%9GCG)2e%}MqEjefK88|p@yflgB`yP$)l5wamLH!6HMckj4
zOh{w}M*W_6i6t0&8^GdC<HUyB_e^_+<W(Zz>pn4+me5l`aax@GLd`NhW3Q6*D9GJH
zvI`P62s_FHYdJNrsz9CsjdRZ=DjB{@Nt=o4AC$A~aijC&%ZK;$1gPWbpH_?~0)haJ
z`woJNfpVYzkbBNLjU@y#7Je_@uP9LrHJ~=Mg;N=YrRzVa+%IjU#`US|v7}=LK`VkD
z(*xeB;4QxdK6Y)d3>#f9YRj>LwxTD{W^yV&Iz8Q?gQ~DQO^K!1HCx$>c@Eh21ZV<p
za_lMhlaJN`#&c}U;em^tCPg0g4e2DAcM#tcZ@#P&f5YxjR)qDy_IH!77diQ23n{pH
zLM7so{<77RV_P7faAY~?0qNHP1a~8PUwX2n*6_?MYQ{w47VkabN=GufC>po|(t35@
zsoy?kQtRP4b8C|l24+vxJDI)SK_oWRS|jSEZI*w247yzs9GcnBeO<mH#g=_*chqi#
zeWBKrc{+~TTZpVX`ch#$Lb6QRU(-eQxLNr2w`?UAzTfFIK&+qaj>m<j_-CiE4<t~k
z>c77GYW*>(EFRM6kWfE;9KxGB;B9;kj8T24QKV@N%RsUno-n-`HB-7LF;WWqzYZ+0
z2__mF%^?>0a;C=xaXKORz6hexKDnWY*1o;9aj-71QJC*F9~!tg3>p7Xyo0FQNPc1v
zHa_){i$8}7oUQ8Hy~lN)P#M<ZPL*5}<p;G0BqHp_RB$ii#T(Y^e&v3T&+ExwV(|r9
zny4!tJv|_+KGb85CNNTwwfoDt5z~~IwI_KmG1{Grts}kG<@eYOY;xkAq0&Hznv^a;
ze1hF6Fmi9gzaz_i0X39%2aRt0#sX}QCa;9N$fNm6s(Vq)xbN)Q_He1V!+n8y9jC;k
zLeB68@;~$$T0ntmpXvOyz{r3sAH3P}gC`E*)dp|dfk5>k=#*1FGs+a<jOQvP*<5vJ
z73iu*5FB?pr>Tl5xtsq-EN$P$?r3p@d;{|(?R$E&$iqNQdloL0Sq3+UF=*(neBbja
z?c1haWR>sSiu8N_yFwc#ONQk;SsR41xVWU)s2Bcp1=5Il|Bcx9NxuDPPj!g%0*b74
z-c?0Uzr+zlGeBequn-e}&!PDbQ`tm|@y-*y>^y#;A`%K^Xq3$lh~}O`WX}5n$jZWy
z;|=HZS`Z>_;x5BbbIBtLn4U}k`JRFqW(%R71wz}At04Pu$Uf5*Y6S->?NCz-yYmRO
zFYou5StPk#1SVh@$S*H?#5e9TnMXGQ*G@4AJQm5!X>qQhl21p3#9S-3fiF%bi$;Eb
zxqMK3f7786hT5x}p3EM?c(oY>3~7&;-BvoBslaL~bWYMey}NIieqd$DSA{|BjDzVF
zoM+?rAYH?ESQdi7^t~(6E)}0bfh$tju(MRW&wckHqx`dd9$JqjH8Q7MlgsI_rxeU@
z1ArDi#nt&8R*7Tis~OZQ-961yHc)K*ozV;cMfUTpkW7zXe8iNAWHJ$bZWw^Q=K;`D
zbVeA-Y{FH1XRiXO-LSuo;utk8()6i->(e@td!Fn(KdTDCl#Hz$Ok%MVb*mD=##+nA
zM9}oNj&pZbAg#hI|DGL&v@3oGxXmyr*<e5(-n!&rmXh!Fy>>rC*QvYpP7kBAL8#NB
z>?||gVqPbrUA-0!E1`Sva~KtS2>U+e9aLmG<xn?Dfn)Ze^(7F2coweJ6i+}s;N?$g
z;I8H;8FVg%mZZ<k-*SSCUA!4TYkx`K{g01auR4G}f@$vc6GMy8z$vC{IopdwThA>)
zn`{#S%+Q=!v!Pi6k38_~<)zvAuDAyjFF!$f0q3R2%N^(%5L8LJ>vO@!h+*ZReXXzN
z%ac9c_k}CWETLaad{iL7VIcb4IpWnz&{X8k8!bB`5mzkkDJ3QiEy(Uhf@YW=PuiYp
z>S`}uDKdVj>HaZYtp9~u|9>Bp5JgBVDqZGgudY5HWpn-h{$^gUw7aBIB2zVl_0XbD
zhmtcSl>GNrQsTHNCequdnnQGMi1FZS{DFr=m$^hA)}DO&7}cBU50!m8-b!SzOp^=~
zVSrMp5jdZ~3;X||u-Y<cp)VaTPV>pb<IwgBkjg%UwF>AD-H$NOPN@Ln*x)lm?MLqD
zsH;zG$g>%MESBl9E9p>KzF5Fym3&nct+@bQ@eK~861w>x4VtF&N!PaUGV;gJl<P9`
zYkeoCZk|{btn|T~|7Z$34-a*=d>6@9pv}_rS5n5u+xhXl&ecutlxZRJBNd*?5IhRw
zgR9iDLTxvjw4i1VAo%s6l47VL;8GM-_@90B<0a;J#QwBq>zWE{uehr(i|>s}#oP(h
z!|Of1`dDg|G2%h%fuiMMVk;!gk^~!htd{1)#P_qg^jT;oeRRC~LkOzwslMmAc*Bct
zlfk{3WW1vTW}r;<TVno|wb(LYul#SJBoBu>RD>B8$h@u#h&}Q;3YuFTKj7sZ5FT^|
zRCyTcF@#C;A00ru^uz3-6C59!%wWLQ-ho39%@rKO3jN=QM*K2#qEbYn$7^ge*TOGf
zc<OdU&eL^$2_N7ttM9?S%TPss9hp7CM>o%E$OTix<i(<lD{aKtxc`(+-i;pJ0~mCk
z?nzgEfBS2+3G<orKz$pj6|6=jh?gBajPA3jrnuVOx$=8Zi~Bn1m*_uGpI?ECTp8VN
z68OtEM;T&Ax-9HW_v07cTHL9bZ`N;o<L7G1<oV(I6F=SuuZwR4+_3M*JNbsuoa}Xs
z=MRo&r|Ke@)ow_V8DC%dfM>ol@RZ61({!(?A=ufd$<^@!H^!xTbwgxuZqdbPEbd}y
z^BU*3izIgP`S%PGH4E*dTm2Z^L`kqbFmHye2(WO7MECvr(n1u4kBvv~wr}qj9BntM
zQ-A%)jS0;|C|7jl@E5v2Yc^URxZ3i#0FVAh>b`QDohv`*m188y^**`r5*6{5iHt^M
z9Jswd39DnDsT*$@wa($R-~WtS>J**oNZ$I%Q1sZ<3VZJ@z|FXu5hQ7sUOd~+Z_-w)
zmTltiU-GDn#pVjVvfEJpZQ_^2f8Q_q<xNVR1OL8ddLkDrLof6cudZX;O}5RuW<lWn
zMg(ZAIpB;o2KZ@D$t&aFMVvi*Kg1C|oFiM=&>x?R?uq^<b_3BLH;0!}1`dC^_xlRW
zh6Ywms>srpt2v-+w9&yqds3bYJKv7n1Jjas0v8w8I4k;JFH<y_L?>hSUKvk)$+P}O
zl7EEq^6z&KmrL2_3IFRR<qmmj6=dn+@Qy;W?|!WSWpMRCww=USX&V^po(0~y9pnei
zxSF=Ef1<-SCL|ILJfImT8{TqPob<2dTu^uW%J9cJ<}=IRz8&QKNmm;0d-A^h*Z;Zh
zKKE(-wp8swXad!xxnLCqToxlf<CfDjye58!skka<0r+J~ljUTAd^*1$7@V?(`QQr6
zq2I+d^wPr;5y0kRAIrUyul;!t3ulv<IkDV^YGT6shPHAK*Q8hV|5%+@GFa=^_POsP
z9B&?9+Q~WkgDjYO?~cnOjiSJIWFaE(Gy|WW4kOaKK|RO_>{Kl2?YNz{U4Xop(F)q8
z0R&M+&1ZHO%07YS`J(}j0&)j@L{rfF7^7_o^v_|G3OGh{V9yrxYoIQpGWz$96m>Yv
zY$c0jYrH*A*N8UNaSENg|GI5PM$sP{5)wAyBiMfbIwACKqCaqhPVRk#-QW0qavWLV
zPa_5{Jn{MaV95(&Ymoo3wa}*a;3zw^cNa#P;SmX|@Xc~(iqXH`H~OzT!uw~N|FP2L
zlsZpf=RbinX&m(xXYW-c72Qn#+!pYe|GxQdN_q$OjcN@n_{$sYWvY7&#9yWYuVf#$
z*q;5fdj<C)!t<YsNa0WCEU5Nh`{TbQwcUvS_jg8Q3+Bk<!PDRq=q<X@U`_t-zyJ3G
z<7k()9>R}UtQ0r+_kG?IceDOqKSf^=42s!g3;(Z`%cG1s^IsQn@5_I66&t77V8ZYV
z+yB?E|MP}Lt#I)o2xp5841a%|n+n$C|G6UY%Vni-$t}Wibt2p#(*Lik%4-Zk$tDN7
z48KD4KwUju*E3nOFvo#Wa^Z&CJ7n~_iD9*bQkI_oy)E5Dov^d6{IPaKcpu8(b1XoU
zJ{wgy&i3}@+w7m9=#t>%+<oKPxE;yrMx;!#SwLk_tq))W)()$3qB5IlSK2#Xi@q+%
z?sO7WGh-DWK&*AUH^+Pi67g(Kz3Q$a$3?Sm)jlxozN`>pm|lRE4Io(d$u{k5ns{$F
z)zRb(*g_IGH`z!ap{MbhcATE^eY^Ya@)_Vz&Gr#q^AM%S4I=yRujB*8F@B3MySagW
zV4A$~p8-{62@yP#zjg-Ec`V#`<QMFrl+pu?n!6DFW~pWxGKP;r_08<<8JP?qsJ9Ui
z5+*w?j>)?9!+;^_^4WRnQs@foh*xBy5q3!Pp%v;lBONrf#A!Eq3j<K1b}kTl1YGSW
zTB-s#wCPf5>WFHcvCg=O5~1}U(KL|#z5csS>e)W?y~lnZ)*x;GoBEh@<X2i&JbI;$
zu<`Y21~<3Mux5_zKOnH|fr`VWsh(`p>T(kTd*q0+P1@`Hs@Mab40iqHBHf3;c2I^n
zf0jlD4F{X-zwR=f6)4hF{B@#4gNfv0a{-?kpkUV+pK`%81nU8><#noS3l+TLq11iF
zYiy!F!Um(4p8m6Ib{`^tpUB?R=4Rv$%A>6|G}P#)F|1`(#5b<$EPVgm0qTQ1Y6Yel
z5U=JZ-e5zU$XyiOtXFUC>0JwRPt$Gw<X<=R&qe++Q!#8mptkD2LX`L52wswAVTW?6
zmibwglq_h}@3#%x|4A1QUacDkHf+R9Sfb{@5)!)%C@V7xl7C!Sp6cuk;WZ5#^oGo!
zu^Tg=<z7%FeE<tbgWv;c(e)QXzp(&eOonN~|GWh{!t=I!%}gz+J4`E5_oiS=LYqF*
zP$p45H-#1|I?8dfw3yp-$Y;rfUEq{6tUOOF8ij+qFsXq0PnBx`l<4h&wllwWQ$Irt
zAtiejMExxU1O!P=%ah85_U4$?5va@9t|TfaPlgo&A;l+Zv!zH;g9ZynQUMIto3=5v
zU%N>DIkpD%UOT`07CkuRbBvHYZ-BWQm*58ZS>-Ntz--1EB$%TP*`j6jq2Xu0Lo16`
zD4#uQE*&tsvx%ROtT`JQfx$9$O!wN4;ZNa%9uVp=9e>gU4a_Y_H~6vL1$S5(90|A&
zm*UVDa8J9J{O8LbuIwl_3Kiu;6D|`E5j$Vbf7`Gh3r7L}1r_%hCI1)AXC2tT$+k>H
zJ?{Atdh^N`4Oou_N%-WV0?k7dzdyOw66>1pKfB@Acip5PZ>Q_72(EtNR{HA*)jko;
zd4yT2?SLY^FwNCCx!wJBoB~YUQ_CY)>m9KU>_&!vY_GpuCi>BLPq*!iOJoGIdSeSC
zXU^Z){p%!rq~92OUEXL0J5&%20W$}B*I-snslhEbV+f|H$mtzYpsF0Z$HOb*2K{y{
zzs`l|MLgz>MnJT6EOJvc3&%FImOkdeo9UfwPZaUBt9nk3q({2pSc~K?_vkc+@UDO<
z=>;aror09kmGosKD5+lLke;lb%f51};v3q|EiqCvz;?`l0%N;=VA{Pt(s@EelKjck
z!mLE(UCsY++eJM?|CuW=AY-lLzLLwpBDwovugglSvRw9rYE2acSO#E*Gu`(eRKqld
zo5OE%C>P3v@>SJIz85W}gk=5gB2Ajr@Rbd)J(8mv?WuzrTYTfI4+n}IW7j~KncsNJ
z(!C{*QiBu=rw#=k2H!4nOW}B&Eo=B^OZ@uocT~;%2pjBq7Bq3!FDvI-^q-RCHfqX(
zI#u5+u$ZUX;@@gI410ED7)S|0rFy1;yV>?-YQZEcItt0q|FQxP$#iHoMCg7B@)ySH
z^71;v$r5kR#G(RnCg4+zJvru68dIR_#KuKoKv)2Adh+E*2V~WKmZ4QSeW2J`2+LZW
z)lyL2trIX4ec;;JR>v0rTO$oovCmM_g4k{&f{oWmZsnM?v(gCJCw+b|8!m{>rx56T
zOaoSWUQ@*&B+`Qo1|pqg3cAyJUU6H<b=|i0HVwap8N9@7YF?9|P4c4GALrZR6=`c=
zv4wKbkS-|GH0!Sbv#$5aVVba5#JaYzw0pR*H1W+Z-|=gKeCYTLz-xTCydO+<ptcq4
z!Gq?V8EM`7`qOsDS8$TqwC&?-<32Sb%>vtWP-AaF5qayDbUQx=&Vu_bYyvY`C8mKj
zc(yk7!f7sy&?KmYq*1V`bwQ7?9-yWn6NOpeW@Wi?yOa`ceeUL%2t-C3pc6KXP>xRP
zT+#!MrO^1fE<k#ECOcE-TuM!LkoN}aNSWRFe$c_cE8?`|wP`r++M3iq7{+O!<
z@>pO-{%kuyl?D)~pWnJRo_l%GWS}VD_()wq;vlcTCJY4W$hFkQ+;^BCsbblPy+TE#
z^Bm9512#{LTNC46e;8#(c;5LB14AE*UH2tcCl$1uYQU@OLWfPxGg79WB-Ko5cPnJs
zY&HST>H_LVG6*y^X<JQk%zpEc!Z3lC&Qf}%<e*4FZ(b)H8<hkbMxaySi`E6jkPM6l
zP>NPyrv-V8S8SPTm+KIC5o)WLliiLL&4dQG4m!v0#71EWG!}99X<;&2U(=->7t{6x
ze{Obv=YAeOA1OU~dQV|L8Md4g{>BgL)INyv;&L+&nT^)@rz<hk+`IWIxSE=!FN@)!
z2!(w43j+48Ti-Vm&0GlR<%ymi53l%0>2taK33aN+IYImB@jmr_ZB;QPnSw;?c$p=>
zyI<%r+ow%_Lh&ZVLk=hhJKezAHzy!?5w2pZo}3A&+-*2V(yC!(OYio*$T%iOIa6^F
z8&CgC8nlPUw3ma63{ML$iCpSW802(qW=sl-A+4+BGi*3!+L07I!7n<PzV0JqD_~%*
z=(Y|R<rEH<Y=p4D&GsAl4=^Wy4PI;c<iXQZOp4RYuSC{YuWWz-$0H!7SX!%Z1E$Ys
zJ1)<(ZwugJ8|*~`n%srhD~(D!G*BPGk`;abU}w2k0IFLuMr)K{)0qVz;FoL)Ig6a^
zWpW*@y#dj#ZT^+RJDx_J<Ek*JO~lt>^XGh353`<6Mhm&n>km<BX<x3qd!=|4%c}0x
zx8tWBZkD-aOe1(Swv21bz{w*4Olob6nMED_t!e!?*to|c05y+&;XoiQrNBGOf*Z#2
zKFouJY>84`qWgtzeY7um!^a^m|1CI^_8@^Ep!(D{%q*18Jj4}Zy3Yw8>|}jgygTK)
zpMPHh{z|3LOoDU?C<K+V41H4C&x)SJ8V8LVx)2lZ4QG1HrSok4-RK^$u`&K<RoK5y
zB>5rJY!Bdn&;#z`%~{Wb@9FbUl;VhEo>7yCMr{tCDJkYq&G>Lzn_yZ9x-*iUS7*gN
zM!l7!Z$=H;<<~NaE6IY#S;48(XlC3Xlpk{lKL2O%D{4V-o^{W0@nDORHqkQrc!>LJ
z_m{SdN<=}EVwUhulUYn()cJMK0Db0*MV&c}U5cHyU^#o{W7PEp7H2t+R`KV{l$&)8
zab=HAF$vQ#GfVpxw|^Vc4Xq{=dyxzw`_W6AUwU#a=LDT2JT56G-6rl;s;-&$AY1Cv
zFgRP6xA;)#M`r57!taN%D6wzhKNp2l&>HbP!=APNBUi+kdg_oBTw%UV0I8+1z7dcu
zc4ghRGZ0rIDtPerd3fKgDEgl=mL=OXp^g?{A!LahGLk-M9f-=$TccfWVX{LOP9Jh2
z##U=bi^;b5>L*ygU?#sw>Y-e(LEV36`TI<x4y9lPK6JUxi{rsiCQbB%KI(epOzF-x
zo!X}2GVmsLjEq_nxe^Y8@@Z?t#zP_d>AT>b?^C+~(S*t%(pqr-NA$D^A;q~HJtKhA
z^BX8gG1bdOzdQk^pGL&V_I0aNAUoT=uyIi;jA3Jl|F~`X0wCwzBe(_RiR0-*kWrj#
z(XDulADiSZPQWeP1+%iarrpQz=;Z@&VJ9o4YKfR*<LXZkCj+td70Y;jmd7~RhX=sF
z2*J*8jgXf91O=7Or3>vrx|s7cSe-yB3T(aqa}@M-7q(U(0H5RV{bnK94zKLo!wyB%
zuP3<{xOtpb%e62!JN7sH_AfN9Y=gS&5I4N7GGE>=r4JuQZfE&gpdg`-L|(N0<kRE2
z&vKCcM;x@rkB)d=36NdMM-`$L40!A53652NyA*JAuU8=E_MQ%GYXWsFQyM-qFXwy|
zp+F2{Nf70we)3TTL_KO3h`vES*;L2NQv|#n6-(7Y=mMDB;DxvxG9Ft9&el#2bux3~
zM|jhrajr?$->PF@*w($Fc4hC~0B@7Fwusqy5DY)=20w#wE}a)Vr!rf5{w)=^VON-d
z&4^O{$0XpiQ)S0-qMSv1sML~|Bl)B0`c;9g;Rx*Up}zsdhC?`e4hj+T9jvWx4JgHi
zB|TzvF=mGp$3P`Y%qXQMlCj4xeGFngX~BrG#?a7CGi_MRB2Vr*R3<atNoV2!2iprI
z2ZpLT4N3v#ceGo$B@a%FXj*_Y1)ymk9OAgx6-5WYV$UpPLM(w1q2Z_tB5Rt)80{cp
z&4<D&Z%U)8cmQZ$o2EU2Q1k->ns^t}dd`Bz)e@<P+E-H<oj!ccxA#Kh72FU|^;9&@
zk?oi6#^`3gWla;Yld|MR69sQP0#6{evejceSW=AKdg|;r0CPk)SRiATjLuMVcl<B8
zU^$WeKF#xn73WZ>W{FJ@CDow|!5zPWu9Py<Kt79NY!^f?jix{PAc&R!PzYm((rc|H
zXqn&2PiHUPRuGBh$mwhT2Bf*Fi4@(c=S|LhTJQ%(&>Q1Dr#WSvHY}@{oM<J&F$c=o
zp7VGYIdt2^_qjV@BXo8KX4%Zk#%RV$ry^;YXPYPPp@e<qLO<V&#*2ttxYgTMn){sm
z>@~qqq5Bs4gTA!58+PMg%6(JDA)g?!LW!wqLNxi>{`_dV<9}yw0w>Gp{~d)gU&XEq
zd23PtjLrn56b%8T>{FN;-xHQS&&gsh$XneqOAG5vy3fKsKR7|=>}1;yoAY@aHH3fp
z^Q<kk-`uhBlv6Z}K_)4f7eH#gskZh|9%J1)qPQ9gr{cJHCzWf3R^f<}Tr{;_3>KWv
zYDg6F^~7xR-VU-&B8w!uFX?(UKTghp+Y{}<CLkyU9!iTv<i~dpPAs(vq?WoqTlZGk
zhNBbuAaW#pF>@NC*zED^Unl3Y9;~F6Bq$}u@*AmV-F>|{yD|#?ieH2;7f30wgHJp>
zJWk0J+D%<cvm+)xLL?e>8Ufw-1}W~MA=o@u{{(m7lml*e^{+$FLhf~#$h)%{w2F*9
zJw4}T8P6#m$cA+NjPQxMN~qv!B;}d3Cn&=K(H&*H`2&7JmM>joW-HEGd7SwtU_6H6
z9%Z@-l_F3*IuX7Al)_5K)kSglSCew(3@Ao}=vzP?7;{R~PC`R&`=upLRrEp?TnCkx
zKWstb!?0m-9|$L8reC8yH9?Wu9?I%n4Z+;uLwG)G;Hc8fgiShTx<10mEGM}DHA>-w
zo`vM&efgPe+9iGFc5=%g%tb4=+?t$JQ~VHs?AK$!XnuK^hOc3*_{-cA><|Vf&FOI=
zkxl1!tB_cIMP<SpQn+;2j%Z9ncwVEHV`iMJmfd>D*!VXVkO9o@rqB0;?o{9?5|wN!
z%|?e7C0CRYi_-H0nzOi8!ps$tTz0<f53?#7ths;FvK)I>B=X_kj!56;@XyT}M_UA)
ze9sp3Bp}!D(RM#TaKJ{%QIT;=JTK#;d5_()Wug5H$wkQ}h@`j;ByHMX`}&VH1d~q4
z_(rwst)0SUu_xQd=e%OZ)HMW1%?jCvcMUd^+fWIvXWr0&n@<AL#bjN#pYbfz+3Z)A
zqsuBuWiOOx@mdaOh^OR&n0<8<bVt=f?^7H(K8fv`SLvxv!1fl+)|Vz;XC&wJ)ZxEG
zDiz7##9rUCN4`KBm+Zv`B*D(>7WU3ETig9Gs4eHtrvte%8l9n>dQYYb5<bptfpzOg
zgON@ReiaNf@!oD!EX(#NUv?%QP{{VZ_z;TS*C3(jC8i-u9ZVcwk6$@>l+J9m|9&Kj
zB3~!KSxRHqEZEIX-pMm7G#vr^Ew2Dxe(jvdz3(@e1Vip0zrFvm>i&jD*$sOuN_*bD
zyN2ria6!9;HMzx#{`r^aRmYV<`ZHoO<kW^~SJ9qHun_bYv>ktyrV8{kX(|JXbGLP%
zDf3>1)4c4swMAXqA_Qrn88KCfZ$Fw1S9rcwg!-I>{$S%vI{c41``eZ5!vUi)?re|M
zIjEF$EB*~|N9zL4sgCIDC<gb+_+~*pqH`L+#5C{`eL&Ki(m$k7=GA+VD?S&HC`XGF
z9VuLgyP?948JHCw<cx~&n3;vtv1}3CIQ;3UGu6w)0$?acBswKz0W%>qqikhvaB%JL
zmpS)9`k%(FvGJiK-3j6%740*)<hPS|KKYO3>Q)itHhz5SCWmeNNYoml>*t~J9`N-4
zfO6k+_r3q4?!$GG=t$c=ZdHQ=Hj2%r;owa>QTY0y5n>URW9GmJy`jdj@_~*wZr?LF
zoV}3Fkvj`cSleC;9Y1F)L9^w24@~vu{kh0oG-W7~mNIn$C!)&U8M-G0v{+Tr9%HwV
zJ^uLZO;^#y77i!rx{GnyvDX)f#VFa{#<&)UM;#&@U%h;}TPB2OUUL{Sj>YU|zy!MB
zwM`eu;TB!r$${e2c1Mb)Q0ELKht_m~MJ)L;O}Oldx&`1pUlM|mzsW|;5#n55V_Hv`
z@9tADcJ*SuIlGU|c&beN-<JP(Rj-J!@}eM{>QSNTv`32<vh?j*1nxul9oO1mLxW3z
z5`~oBWQ}|-g;gkfJ)17<raG_V_Lfc7*MAp|fX+v&)&{b^DEiAP{%>#9opxZ8$W4(m
z2Z*L4$6UR0`I}N3!+vtH&KN05^=uQRB33`-kf;c!;W1Z$yr~ZTTEdrlc)F{WX>%<4
z3-pzQ!xz^@KGq~wYYjcz+0Zt-S)s?_?u0L7?HCO?I=ktaloe?9y`5>;*dS$JQdw+`
zaI(wvoz*vfXr%NLSEHEWpT(vp!oeEbO{D(%B3g~@<Z?F}AqqJKc2iCTPq;!HO<qMW
zKssrml&CCov%a)dYm6q`v1y87cq!<Xxl>XY-StWP0|XH^>inuAxT%an+9ViIW+KsM
z=hm&n6WadyZ3b9!^D&nMFVi2yJ*L1y`1s06ln={<3$_d1pPg1G>d!E!cL@#d9Lk44
zIKM0VJ@;2~Au7c*m$ikUH`BDN-zHU!!u{uQ$$s1i%APtB+n@CYueA>=6^^|l`FluU
z(L)(1xJmrSsO1hjej5NO7Qj(`9J=k1uOVRhuo0m@sv9dauAyU~<u|OZZ6TNd=QPx$
zYH|~VNUQ2HZdyUJ$a;%8Guc8+q_p7IZ$zjgN4mf(ONr>I$NZ94$&HgjaAokB>+Gt<
zPIl=X<gB->xRQW$^VA-KBBSGCtAK`46kaR9)$+jj8S|2{ZVN@NIp%M#FHaTUYW|qw
zLBTuwLU1iNoo-anup#K7uLJVFlGn<5A1e7b1Z6xVPua6TzgF)bC=xw(6%M2}ib?1C
zu?<2ca?J&2_$>Oy3sh}imBTvD7HAfb_huREIephCn6XLkM{)N|Pxhk0-MXPTog(mh
zKU+?O!$WkF-3HhYY4zH--Mronxy#OpD{TYdU`$a_R|c~D_j98$Qi0c&L%L^`)y+o>
z_8aJUt8|;v{{+?-MQUi0-^BeDcy-MebU20cDk`pQfN!aIT7=d_6%`DpDmDHNEq+{g
z)A~>6t~+8FdO}<ZD_@<w>M~U?bA6ucFrx`2tuPp`=VI+HEGl0_J5w}WC(d>X=7UR^
zfllW|gWDjXo8Sr!J0(lvOyCdra0~9wS=0%~?c>H}(*~013eXL!tK@T95Q%G6fZr3%
zKn^uYnH1YOlW!a)P~EWs@ra%`3EhT2F8eU<NSJ(e3#vC^9Xw8iNm`9YprbQ`8hYWz
z1W-IabG(s@B5MEM)L=Jd*i-m)KTaY~5djg;8s8~gMrOD2+#`y%S!UfCWW3u##c=P}
zkC2HH3;b}56ZfWA0WRozfO!pO*q|?*?r>ka-u2M)cRa|+$n^ULf$tq+eZm&5ZJ;Px
zp1aQeJYuUWT`w8(8+&k#p&2dWk*(X%?;Dq!l>^alY&*Nw`|-gK4j8@qC1+Z7lw5R0
zKdu2ocD2GrRrn5|p*hfg^%+iiz`Q}MZ1}H_$IAnhx+$4#dC$_E$J_Id-DhN`a)+cX
zl<w=8;Re)&G!7>KfR-}p7|#PCEJr(c!UixP@+?vhT^Oz#c!a57BQ51O?P#d>p)m@1
z*q)$7s?yNPEfsJs9R$X%U(<en?J&~~`Cvj&o;bm>TEJ~HCY>tt5drxZunGKZ>_>_Y
zQS;P>@R^&y#?6H0n95r!b1KjY8rC!WeDjd#B6TG{;7aNMxavpMiN(zg-njfc9PlhQ
z@xwc^sp`2+vC_f1Pyn(O><6%R1{x7^d@ruolDW)~#m+|$ZHlSlwvN9%gO0KSqP#3v
zk+{1No)`J=i(4z@E?_g(1-Tr?q!;pSgp5dF;!C?awDp;i{i-E2JEgAW@vsl`tU=&y
zzP-8DR1z8}Xg4XAKnnoqYgc|D=u-~R1o4g6GawIXl|d~62sXm2Lo?z~D4Kh{HJ?gR
zqT}XDEDQ?#yc_WL7i53d>OAUsB{r%YYNHn}KbjDxK%>`hshGjdLpcp~t{c!8#7=c*
z#!XnKI6Va3utJW+t&@UyLN{r1N9#r=7@gxXQnD=v2Y8IOX2<zI*IVj9h|_n3?VA&{
zro@GV)R63fGMF#l1qnphMbxf>Ngo6;(Ol(ZH30)HZB+Hk#$QI@N)~uhDQ}-^yXN=T
z7kM>o4TRq9?v%g2MCFwMa$*VTKJ$Z-!8gG5-6;%QPlnHnKP%q;qvigsOus@WvvBNO
zvZNSMFTcaltm9v;hp(mvw~*mj0b-%*Fx3*{9S&**_6?jhLF>u#?>x4EypEh?FHC7P
zKC7IBr_D+<2UIRj1A!KOva)wN6*=a;Jz2)B(o|0%R++HQCZNAX%b~uuFsi8%cM4nv
z5rw>RX1)#P$<fU1l%XMLw5so%Iu59Q#Cqrj>*%-T$xzUirQl!u(F_R3$UFOWB}`+e
z(eNPlmIbu1b@Ip=7;FK3H|!;}xTv(wJr5kZ95);r2(4;Pt!H<3%jbJ}5@*T;{&+3O
z7XCID8SGJ|S=&HL9MQUtb(g^7xigpS9VoOkN|kj(%bR64o=F}wyh*#OL)Mi8p78>M
zkM@VsDBlNS5!g9s<kV09Z+=>EBYb+*FHRYulIMcbfhgD<ee)BLRW6QqfJ`Y_d}#AQ
zs95+ELxHL)_B>E{m~39r4LU&@qz=Gvn3+ozYb-Mw^Q6!=!~-u$UW5L)m(r%Ry(HPW
znfuKItc!)sVq1=DhvnLyhAu!wA>%gh*4%5T>vzLA5|ee2S*XgOV<y|tq3f?m#;hYE
zPA<#v${YIy%TdUM>OX~6zuJr3?{jY<ADY~|gWz|cc@RRO01G(&Jz4Q@RU4$<LnX#J
zKBBz<q(Cgvx6w3YU4v%?0qmM4(E0Xc;%h1dh}HwZhB8HU6PEp7px~wUVdBfXs|>+C
z&%<gz?Z-VBOPxbk9_NP7ffYP_QHqsD;iprP5v!;(&b_NckY2KXb<eWs?^&H<d4)(t
z^TmT*N0v7bt+twC<3W-Aj2aYo%vv6h|Gen5beHC$OCDzMGi>a^=>5ZQwY(7LK=cVj
zBnEglY|uix@54Kf`=3`l>JZ*|x_x>pKvA(Qrnk);qxEnPF)Q|h4~qOK*|&gCdM3yt
zVI<;Qc?HZM><Hnv)S7-y;F1VISo(rKZe7HCQNiN4gjLi|oA`zwa}nqXNiY2yY*a^M
z;MzYfN5O)~cJQ1iU~Q=a?>vU1L&?%maT&74j4!71?14ZX5TH+S;CLnlr6<3ir;X74
z-omD9CKbjIzxZQ0!0X_B_t>Ym=RngdBb@RDTL!9w-B!);K$SEBO^9I?J18+_qf#(L
z6|pwhO^xD0=wcFvO1lQ?{m&YqvbI^eCHT_KbW2x%!|PCywCi9`b@M|b6iv5UId^gX
zG5vjPEKi}koW(Ry5PCS-Lc1|)S$FR+l79$e?oHNqs5x%)LR<6eQP`iaP@UUP?AEWl
zb0;AhBH6o{(B1Enpn?qC>-^7B=2uL2XUb3wKPNjQAips#&4H#>yy>ZuW2x_lm_TV$
z4B!9mGynp!6^XQt-(LqLM|nZw$2DxXI3dP&Vi|IZQkQ6*B)IZbYzX(x;|>`*-%y%p
zn)r<cfM1QLwXP&qEo=aZ6g?n|G)V5OA66l9Ru=Saoq#O%L|j|sDT8uzsHj{D|4Ilq
zICRBrNPzOjK;oBwGadhAd;#9t1#O4khwehfMEs>9Z5D7k_~1a_;7#?lP`bKRwFNFc
zOFcT2YmK=+f~$eC@CQV<cS4LZ1Si7k4Zo-gev`;;ym_70-=g)y40JXcLqdH+?dk63
z<#LCpZ*b}EtiBzWz<mf}d!fTgXMSfsbqQz&S<n#IS$bq=#u#*pmD6>r9{OhNK$UYQ
zsqg4k#QmubwhhuD0(1!cpg?!?*fcVLyGvo9=Jxr22Ig9XVwXr3K|UlCbOPW1ba~@f
zX&8c>X)<Ka5^7|ZvZ0zx!)+)%PpW#Z8*pHAkj{@?y%mV>d8MQ9z9qN>sf2`xCMe~7
zjhMuk7#Pf&LiI59lMsw8orbQf<!m{@tNYwDnB;+Uq3Cv-4|^O`ef6E8E5bZ6Ifs?F
zi_g60XMnpb6{Z?+y~ETz%&*g*a%4b{PB*EXF!9BuQO#B5;I*Y$si(FmP0k~O;E0i|
zeL>ER_<ugnKm5M6Q}PI6IieJ-wnRBT>^;sA`Dw#qfc&%ym{?sU^&B(Rq;pE{T$6sc
zR`S71310Sq%GvdVJK{}GubfmoPyqS8I()*Z(TVA4l-$P2ohTggAZ#&?urc>@`~ik`
z7jOzp?}vOm*6e19?KKqjj^QrdXh$_pXg-+q_I$v<Oq&8yCgc|==^TK6G7Y&%4%D2U
zcng;%`XU5&Wb1+R>^YwU(8?181SP)r0*^ToucuCZovUl0WcPN(90M0s8vx7M{U%0x
zk0~o6{P#wskJ?3OS@x^sWW;NL%9i-|6%cKfQ~kzX^rPa`%%_78#1^oKo^uHs*IZQC
zrv*4hnB$oQ%=LT^TK3{41t^7wv0O;J;z?j^mq`Bt7(D$;4(cK)&E`xYQ_%SFU1JAw
z7pb*lw+%4QCVD%s;=0rhn8(Ez!5ki`sjl?*jbQ?<kX9Mh)qc;hY*(bmfFGXBQnAuw
zCR5;~L5IxSEhCAwOJ=YXVLwailystyC?`QT>k5co2n`Nfl*HL-@(*UO`^by-p|R^F
z8WpRLjtJI9Ko2R?`vK0H<8RZnmhhKT%T^FD_*o0X=%BH6D_%j|GKEj`tiKG1`i2R-
zpcCmyDPB=)zC1y&@778exU8n4rmFVSA0MMlL<E}%mqZE3yqZ`&e<RSpZPwpROE((#
zh0%H;_Z!f_n$M=1QMt;tk{$#a>E6Xq4a~Xw^;D>cbB;_fSCwBf2C5D`A_Fs^?Q(Ab
z>eo8Rz_J`WHz1s#oNPiM|0OR@E^>D1-2*1M2s5B(zl(Q)dRbFpU8Lg}6~dfZ9;p^>
z_XV)%m_o!Pt1N)~ndOG49+LD8pP!^Yc-+dZcb23>QQpmi2qtjd0e%pth=*+OVoK?@
zlPQ!CNX26J6$8yFwBb8*^K5@%Xz$e7^P=|mRIJJ%n+p?th6r+W4>zriv<(QN900eH
zcK5|UcpUTCgGT?7<$y3i<8R*L-dOw^WOrC(On$3gK9)fEMXOADG?1nQ++RT*F|9H3
z(qNmG>+)oKJpvAdw^txu%0c6JXG=CLT6wbqc%V!q7}P^K<%bQMwTuZ1*PfDsLa@|s
zoh<|+x(Oj=tgq`ndC(<3^G1rDvv}&ZD(|ug@YFRh8<Ws>#=XT?NGrRwz8r2k!LL!8
zto>juX0bPJ8=?)u4Y*j?puNv1fOh9`oeC#}$OyWjBY$Xfd)#?*XM1iZ>_sepzwO(_
zt%N}u+|y^h<JQ|PUyXg7&zrcE^O7rO^1gHR3#ul|B<ZEC<Uzu%Glyvf+x&u+O1B82
zl|}8$PNdfWKibhO>4A{Hh-|5U$WxEwFbBfE)zXt|liZ>}8%CvtE|xxR+ws694BJ=c
z;ek!)iI?@#uCY<?ny2V_L>nA)HBkP&laTR!m9lSUZzK*e3BKQ1l_tNfYuy?U-y1?Y
zoVw_c3s6;;W9rTKPZCRqRNOj{9NpyA;QPR9u~ej>^M86}d&hGXc{8A-g+eO*K=xIL
z2M$liE!9@8d^>25E@oMjgDQB~w7@#y1Cf$j-K{S#Z9BXTKHiFd$S<5OG46UFpmq)T
ze)4=3P#hE5U=;#de52I0sIGigJTcWlKinI^OFV{}S1(gdYCqxfF~^MEE+;ifErCM#
z@xrkX(<A`6>|y$Lmy(L60Yqyh6Vw)u#QQY3r|k0JQE{$`mWr>tw%kv`jwU}1@h5*M
zcJ5NjH^l#55=k5klx9j9R5sXZvuhuOnR4dpX6fRw{etB#kWYSidsvRjV0kNH$R77J
z9EsrA7xojI2da*ogCz$3-S^GvI<djQ_2Ms<Eg&XgTGx;;B-Y58lN_qIrEEfn3Phx3
z9|xa9LKDn|%X(fj*rC1jgpD;f-7ZBr9%V_4m5#6_>6>wn{X(nuEYoii_?{nu7X^Rv
zZU^DomTIgbDUihiX@O}2!UuCW0XdNRD^MMz6sPyXUU%7oxH0G6s3c}F<{N#}TY)Wx
zAHX^2g&55Ygb%_O!xOduxtfZ6TlC8(i%Q^I(eAP5f9;Xq$;j@%BsC$hQYgh8I2ctH
z4sdce@XpL7{~u-F9Zz-t{vSD&)6PLVcgqafGgMO9j=c*ZBYP{f?Q$er_7<{7G|cSm
zS!A!WN&T*uqI8eX_xI2JP`A!`kJsyZUDxw^KA#t_JrH!C%J_ovvS>f2@o1z{igHGL
zTs|NeR#2$50@jC$v5U5;2&8>_T5GhLcfz7T{~)k|!q9?8L^9C#JV68s+7#bJ7vc?|
zN*H$6Df9jdVV6&n>!81U0Dx5Uui(Mn3K|GNM$WMMA0Lq%pkPur>@D@OE@_3|0oy-6
ztz)5}hExQc6uEqu_cH@_px_waZPO0P%D{AEx~?Q%c@FvoTyd31j)F1`%dxh`oLf8j
zr@hVjtBS>odSR1YgC0^)Lne?#2+0ci_ki}KjIX#-b?KWybFv3#;#Hk=-I`2vDo=D3
z1kYYxum{sjuDB*&W3{-!_6&o8$QZu{kPvBN8R(j%VMIMxgAPo|A%5pn)ca!8sGnM>
z9}xcpVeo~I=QjXNX4M1@E*gA+g5SD+pGdc#C}YkWi|;eshkz?g)&q*u3eEe7?gapl
zBBTOA6OvK0P1f;+qBAsG`5A?l;7~~}0Gj3DyRoQ$@f)ZMS=V}i@aO;ZzCjFN%on43
zMEg_H)M?8^iTL1y)1cqh3Y@5*ZI?br$AKanLOZ7U1>ggSlZ~e$5Jxi<3rO;PzC%Wc
zaAMC2EHQ;8r8GC*DPDX9JIW2a7!m3uyjuW};>R?IKV>c^m6Dm1=Muaqq?daE8i9#0
z@FZFb6Gdw?3ZQi;%BrK^X_7|kz~l}mcB8iU$=M&Vx<SLbFqhBM!vV(@BgnCEeMd~N
zRR9_=`7l>hIk_hds^ISTelFi|eVhO+QxA$4-$g<f$ST@$Zecy^XV~sT9OBo1$<vyo
z!b!bFvL9<EJJ|!r!xb2s`Iw9(hPzy&8NaL@=yFIB+K1G7<50U>wV{nM3rJ}IF=YnS
zkmVKv?_UN%0OV15t|^peywO}@s&QZhwv!hxwJ(!ub}iVwkfX!kW7DN9sZh0~zmm!!
z9CeXi^eAM)dcR+8)|D0V@S&EJn3H+ow#WN3oZ=CeT_=XSkUvVbZsfeu<<*ev>_p1^
zU)wOu9qCjyZxvK(Bw$4MdE@$`!h$XCCkfJE#Aw}A^3ULKds6s<(*Tv_5xR<K@w87R
zoYk-lqK(avpgfAdoH~WIQG&WG^^1uR50S>urt?SyBcEgTX);RfC6e%68lhtic4>bs
zzc?33-2JUne0~;2FC6Fd8+`*TzTi=|0k~S{AaYI3KNi_PYfGFc0YS^x^F6Dk6WDN!
z_(tx~NjGdjrOok*j~uYJt}eq`i&pg4z(A^$1}6bv(+yvTS#+1ULI26`WmFd~e5swL
z(!ZUZ+4SRi{ByOm9njSY8nx&!2U)EbRKr%LHPV03b9?eO99pY3RxficlM<u_z*fO^
zK8|r2s`Ym8qtFL9OHz(KX^QpQyNd~GCp~Z*qD88QKS%jb`-~VtcjA-n1FXB~>|iV6
zFIk2T)5~QRSg?#lk>LjIYGqf1Ei@a6#dfd9xd6~v$7x0Z*7++^P>4)4LX!gxmB;{Q
zimuTes2t0byW@DqK`HS~H%yeJ97qt<+=27&p#bum>;pufEX2+FsFFz^;JNh`+S#zz
zOY?9#t9Q4$=SdWt{QoV9Bq+hV0C({e;K0K?FFKV%2U~d&KM5-~fz1}+=c34QgluF$
zv0(O-^<>cD;zfAhw)-!nY34znqYba|p@O95?g^V8?Lewqn7!P8#+d~w5MX_|@|2tR
z!lrwxSA;^*I2?KA1G3>6NEjB#ZXn;3k^@wSYs>q~87E;Uw<Gpv;^nT81evJI%36{H
zTVr_fCz+Unf#H!ynn0g^<(1DR3Bqi^LsmA|)$OwOv{(7Tf>cz*ivoHh-)RscV-FN8
z#8i6@Kvzpo@ogheYI7L2FdyJHi}W}m(1k3(oZbcK|B98_xUl`iQAdIE68D_Ca1hW8
zO(@sJREuL+joLFa5cCq$RX@=Wwkgjc{fO!o^33dj`nzF&E1I#D)rqE^W$`IJa~T69
zYJb+{@GsfEuEoW6t5`q;b`DhDvB#`_Azt-lVdU$cqm&h9`qr5nwPDVOEZ<8bfU^Zj
z*#IQTKLllAQ=$?pJq^ussO#Gx1H)TAdTJ|3@^sVMX8>g!f`pd!5U5sYAMK{KNj}d=
z_;?gMxd;cR%=?O$4M3_(hnm{<@H}L02eLq}^1Q@o^2P@s*MIAzUwk6bp95uKK)>7z
zYR_HFth@gGd9Sr^5ZnUdfla#5SlrP$J=74>4p>&&nQ+7+AnfNKp)|8~hO8kc(O;tx
z2qB+ri$1f|7<FXVfmf(njbog16QN5h>+V*{5{f9kGqCsZP0xte?g<E9M~k?AJ+z-!
zwD#W7bzDShJGy6k_ylYwd-mig!;$2e=P4804Rdyo9g#AmNe#oC!6J{NEuZZ%mW4J$
zje@)1ZnV;(w>gIJw!cSOlR$O1?@NfL!XzgBSik5K5;6sJgX~Cb-z+?PpO3EN5(z0l
zyt23ZIsx^ixL5KrME_AJP}wmwxp~TV8EkV%LFJ!SvIOLYR+LTv=}y7!&|=GV;G}vw
zH?=|sO&>7Y+n}y$Dy4rjak)Q1A;n7U#bNT=1;pov8gJiA=bEToYUPc&ndA4H?$r2Q
zYz<D%aab^G&^<e)FqtSO!{M&sEc4$<+uEB8#h5YMR1fUyzhsRKumNgn!(r6cH3on@
z^>c4&Ah9Oy!Ly<`pz{Y1cf2};#P^7BerG4^O`!5Oi~&<59q1knT@F*?K))a1n_Ga`
zX>aVab9+5$NP{C@N_zS$a$FlcC%J^(;W{H%tIShugUX2*0|Q3v7G#vWqVj?yXoaH+
z(x|3vQ!P*zXX0-TMftVCOiTu=pg6|pL_09FebAuL5G^j=1xC5yE0U9jIDCvUD8YgS
zYfg#>w4l5jUT8Yn1|Zuo#2y!IY3U_x%>rJ(e^!6j^DF0c69eoKTf_gWc06~P9)#Az
zDc<UqNlZy`U`7&<5A{y}-a-41U=Ejs%(|{OCF<{Q_TcQ&L)}B#T84sz<%Y{dwkYia
zMR2+2nMJ+QNLaj{d3=QJ_)iEWLA8$+a8#5buMz9Mf@NNc@q4G9F`mJJD*l1R)feb;
z3t)xdR$-YJO?yM^hG|D?pWOy3oM|-=0}YMI8M1)ykbI0kjU7dGXlDcvy#id$;Vr84
zSKlv{BxWziZ~Ng#-2YW6c(+4o@TqKBno277ob+TVP|vSHI%h`A1pGdFU<+0mLY19~
zi2P82iwyv=+YwAP8tMzgDy$$&&r!2%l#>v=SWzKOj(EA2xF9u*dlYLRs(CM_1CI~K
zJ!{8|nhD~Bg~W@_l^Zd3IdO|XHxqG*gcZTK1g__gIqbpzA`vL9O|jC)P}d3ZA@m)C
zWi{JCj-bYHER?tw2F6{+pFEGDi%aIvnUZ0a0hHdTKJpSBT^cP{A~;4NIz5iIrY{V#
zZH_Ci?hu@HvDyUbsLQ@AIY9}T%3HBEawcs^Rj2-N|8-TA_MyH8X)MCvV@7WdD>&z4
zYw?2WrJjexi}kVbgsf`nHe5$DR0)W)lnBgI39H=mmUv%!MJXaZ7z=bq-zXnEZfPej
z#h)n%`3@W@<m(ZAx&xZ1(Sk#8NK=nqXdOf<hX(#!C_lxDpD9m36(G@akBs|h5%bzk
zTYF)DQ-TXpNv*u2G1=!AdCo}-zJ+fw28oqeo2pkw2fVPyO%Kobj~5aT5~|8Y?J+|(
zaiL6>4ckLt7frZ|eTKkr%4~Foii6p_HBCo7?>VThOF`$|P8dVVZ)!-eWoYS3<0heO
z;%aTh^^p%)L|$xuc<@cDKcF3OU{!HPB<i9@Bkf)QbiS}aH|4l)XQ)hAyx&q&fxUG*
zoMoLFMOxxIH}F<v;KA_vT6NG+Y6f@lH8|hWgTbTmq-55iSn;$C{G7`)Dq(3aXtxFj
zF4kG%1mhTPU<K)uCiY>r()EUqo1WCF9he70>Qun=+3j3GjJTE%c&BbS^y|?5`O~lT
z>?&j_d)f(u`*cDs54b>FEx3F3nMJ_0BsA^pX-45xTrcF%frn#8EZ@}?o4ZjmF+NeN
zGPsp0BvR4VpWy`v9gU6;M6Y)QH3prxIC6ron2>t83*d(zah*@oq8$OsZh{aHxvwdF
z31!36SCp6RrVXG=p_(P9nQ-{TNUn3KE3_A5Xl)Fk5Iw9yk#a`2`k+B;S`;l+;(Y7J
z6GNLYrL&Rjbvfp`OMsGVNuJ=7-*hgA4ic5iN@t$cq!4qmk_NaP4nNqDxID-)dQ55)
z<|)g6Y~a<$Q%JgtZ{UE=;-R6`DC67G;VI!^VHPgw;Y(#D-{2+m#<fG+!ahD!(wBVi
zC(Hcr{PK-r#Tx4ErA?tAtVdnYM;|R*27m)Xrhnvy?z^X_`%h>xs2@qy@l!Q>NUKPn
zsak405LOfgM1;792}a=4VkO6<6a@gniG$YpR$y_ILaU{fZ+?@|3H|Ko(+hQei;9H~
zIT`v5C11C&$=)*NRGWj%Y$>Nk<PP1(euc@91h<kni$*yGor7QZw;OS=;!4!rv{rWl
zI-F@}iIV>y8?Du)-U(=16a_!r@#G6jkkwe5ZJhd1OW%DJBN0s2tvY^t_NnNrYD1Md
z<Ip-d6DA^WH%=vSdr-uZQ7P@Xgh!t+uoom0g<3Okv4rPmH&GK#kAcXDrv7%*Qx+3)
z+UX&x;3aZLeGeq#eaLt@H|3ok&=n+=seOE~0r*XYVv7%&<-tLs!p)j7H!&=XYLU5j
zIfUQ74N`FY>=ckz+U}5&$uw17!|>a2Nl)sYY_96JE7w+iAq=<Xy2WJm)&0uF|0uXg
z&I5LGF#8b_<0(e~U5>y0x?j}=@Xlt=QYwDcTgIkI+abwVsF0%GIrTi8bq+zgA?>9G
zyMl|yp>gS|a6(@x&A#vC13LYIOA>U)dXIVW$wXe@U=4X|&u8?=Erv_#8XM+GRBV4x
z7euTvaGWxkP5&Ie;kDtc0Hnge8#Jqbb_9Camul?!<G3B6^t=>^@LbsTbCH&pKlZ`#
z>+sS&%|jm8-*Y#I)|*%e7H#_%Y712ebOS!iUd~2>rI3`^utc*@W&^YOJAjm)0SR>N
zlq$l7a>NVSV&#(p2A<u!JwT`%g9>@J<Ba-klcG8}!NrL5KMx<LM^!*?NC@YRkOByK
zrZo$4T-s&3RG?cEnTI&sv&&s^@QB*f=wc=P>pHAVfa{{dF%aIvfeFkxB*`-arv8K%
ztuPMSXWD^>8ddU<RcX6lJehVOm8cHp<t+A}b|p7#1$y%I{)QDBlQ0G>v+nN;b)|2(
z4)}uZDV242io;W}LaRrR8bj2jrjAbN@rRyJ9U*lD$Z!${VmEbTE*tDpx-JXhq=uhK
z`9+yNS@5Z9+Oi%U$QrAA8E?&h*j!V<2Gs98nCm-$(P4f|{Q%7d!_k)1N6?YY^w$@A
zL>y`r0Gx2d?QbifEtAx;Y;Z2Q1!YWLT!4Hq5=Ow05hAa`<Z)6WNDqlhTF$qg2Cg7E
zH#p~>!_TfrBf;-lPg$Ts-WI}~$ncx)g%mcW$qpc6qGD)1#f*0Ru0W~reX5O0kf)b|
zYD^ETM}z~c;C!$G6VszQ2mcJtyBLVB{nPF(E7-S(&u`=0d8PLJ9a<1cX4{U_&maM~
zzqTR|)K|oZc0I0kl*!D2u2p-qcwqksKO%uKDD_B(yCLR_siaZM^J19V8<joqmC*r6
zu&x%i04YMU(c6P&F6IS70^S9A_CW43<d9kbiCa3RkPp;^-heD)ri~$Kk?f?{#Wsn3
zv%u>4NYYvWPGhhIvRaX7_LTCA=RinC76bgP&@?&zVFv7J%^L=&uiV|k_kqKkg)b6V
zI9wE1j(o^r456cVvYkFT`xP`44C!_@e>(KD#{6>L>B_$&3UIRZ6~I2QwcBcbLhv}g
z5ZP%pFq-8$(%{nI+U*ZVxTNB^dULKk-c+HT1(dJj>>oqpVT*K-*(D}6Z}<x%8Un7Z
zA*`je-cqgaUv118O{?Ua+ijdM2W~e0X<_;rDzLxC;Mp}9E2}_hqn4Ei^nV2BTB->I
zyH>CbW^_VOHiFcKJw@qMM)G4d188cp=!CTc_#J%R&SDZ$RRg4#g+!Vs&`*SvIw7z+
zA50FwER`A`dieg>w)*>&NvD}Qw;kqdft2HnTlINmsFD!ZMh!S~O4~CncqNAa4P98j
zXmrHG7&g^>>frB!!4!O6mz+gI4t!F)4R9~HJ=+z}?R9=1Y_^Bj3H!!XPHZR#G?*&M
zP#c`2L+{BN;wSN`uZ{z&fgQF#q%}$zpvn!5w5r2n>k$eU`5aIJj_dDf@Dwck!z^a6
z-g+MPiW9S{aETVvaa@|s01Q(J@p@i(Y6$nb-HO&5LbVR)k1Xq6F&vD->bVu5cqg_)
zl7EJPs4qZ|DQKZ1^+NqHXgrG9+=PGL6d|(H0611twhDrZz+QlCS;D+R_9l#3JZX5u
z{-{#4M9Neluy97f%{mR_9?DbwRb!9^dlsm+l>)DAs$&+2Zx-$_IQ|BJ;FBF8AKz#s
ztzrS2ejOKYNoam!Q=sz_fTCX_3zj_q867)wYrjIQRI#~8MPX#=H9O;{=eFC;jR&8L
zDDRTOJ?+h~#@w?t@`?Wjn_9g1YTWSK5J|S1nB=!j;uiT}91w6=VBZ-H(5)ab^y~Y{
zT@AK<hWVc44CK`=(WbP$%1|nOl$$9K^J4CVc;qm_nfV$}{I?X4*p%>NaQEnGo+>zd
ze(05*z#eq3enOF)5|#tjU$03w0V+`%w9TAuYiLbLUh_LwY)54Y*L&SDl6?m(8Cy?E
z@)IjMJAhJ};qgO8CEjXZa-#l+Ysuj1;rYjriB{7Iq&~WIojO~dr`UXW7^XBfsHg+)
z*)rXNh4oXfHq^}Hwk=qTsg5$?+W_Ui7EbyekGccV8`8gD{?@((#mRTxi>cNi%v9pi
zJf$ipq)Z)Ta(15ilYN@I7VQBMx!R!c9ioO=W}hmoI!sG_%Vt1NN!i@2512q|E;d@!
zqT5ZbOZGVwec~zKmwCaf190M%F#4QiHecI7)~W!RM;k<ZImSEz1JzO?+O$Hi!7szK
z9x2II?@o!b=eIxo{QFhQslHT*9o!~U_1i-lj!~Fv&WGF&3ww+bA!EOk?a;CfXnb}A
zY&DfeG+I;kSdO2tF@{>svt8VmPVVEAQhN98wXO=K;;qmNnurdVu*9{nYn5#4Uh-)E
z066uL6fWad7|bM{sOs#g?y@an!s9Tf9T+cCjHDmOYLCwHpe9~>ynbUl)F5I)5&G<&
z*CV+RyDnNJ=K^`U8+dWh6h-|v`hwPPh3CqBq>yVrs%9qm(sZZQpdrUi2?|VG@17kJ
z<7R5NAwBzrM+j35hOqkTfp<q@9H0<gCTqHc+eMk;oc7L^)=Oay0+pM>XbuC8t{|fs
zHepS#c#IHO8>n%LTHJv5B+~4ds9wM$GY9soEEor2>}8VyxKV4z49F64lLL+4!eJlC
zvLm|vOs*Of=7x|2*9re#fxv4q#vnz|uQ#;v(~m{y9S!%P|7xLCOz7YcUmIR)u>0l9
zF0-2r$M%SL9@GcxO{I#W94Q(?lS5OAMkrv?=A;J}EUrSlO*6#Q9s&XiDt|*SDCY;l
zu}jJHHA}nL1@nD8>LyBNIxat~=#_h-mH)+oZfP*W%MjF^N=Lo`5U#2L=liz@D$Sx)
zZ7{&;!}MoB1=^rHG(PHA;^rq(Wv666RILMPPZlIlXFf!D70jwAD(ZpO<YDu*_XwFK
zrYR!)uUgdkF<{Wi+Cg}ABm<LFqE^2P^1l{D1+ViD{n*(4{K{?c`?v2u5qnZyK)n*>
zB&g#94)UV%Qc^wfwz*!70PkJv_{M46p$Ap4r+soH)(7)UhvnWePp((PU_y3@@>2Ua
zyh+j;g+#^EA97mA0Ps3z_{cmcwu5`$)MJ`+%3>Ko0>F4VS{$UbH`OJx!zz$Qv;*or
z@Lo*-Mkn4HydtXe+9`E(5T~I0jZvWX%&Xpu^3)F1m2F$IE)A$T10~ywL+7DbjH6*7
zAf$Ym57w0(5Jnw#X}bUMmVXk!{8p-QBi=smbM=QG5;sWj3>`=Cg(1ZLM>lcIbErE2
zz=9#_t)cCJj3rPe9qFmTTQMIwdL4+lY?#;EAelp1g+1UvqhN!4$=`_4?jV3G_~~aO
z%Lk0Ji~UESW;<qtJe3f;;*?r0n@qozE|77pRM>U+nY`-6T9st)LZ;9T(q>~=>TYy1
zx(ag^;Ouy@#ow<d$ELLmStdcVQ?ljOC@GV(bgGg0kPRXd^HlXe#S!{5zeA@WOHmF}
zd;%L!FK>$%n(uM|bxNRT$8vkP@K*fELeN82LHm`@+C%337nTW_UH$&@(WRh!i9Je=
zl0euJzrW82A0tr}%kS}HvrmdT-Td#xM6-3}A*va7WSd?%CT-FJl#^j?>jZp^Cv7hz
zrj0=&c0kc+q&Q^qOw1D1KNp&}aw1S)yjT!RKr|1OutjU`#hBC1)2Y@}+UFLV+r5kI
zx2qM}+zOF)P_ayo$U!~%NOB&UdS=afQ(-pFo%i98V9AFKFD+OpODvT70Iu9Awtsr?
zgw!R2^Zq*{n8GxVcBT)eX)mcK`z3}N1{t(xCdC+omw-Nmo2(cPb-D?FG?NqP?W0@_
zUvDSGnL?TW;he(6heW5v!zPWI_$X*bspOb_`$l5xpVg!;JJ}z-dtoWX<y;Jq$3JMQ
zV{!m=c`H$*2_REL;Zv9W<hUTK&1@^Zpx+Mjuu&}Y3j6bCBKhNSZYgvc*td8fr)S{{
z+m@{~ul-m~-}Pd(Rgik%GPGyrS&ti3PRw=slAjQj0(Iy@+9Kms!PZAsP9`K2l&p^!
z9B2=`i3_88!}V65T<4h0IJi8!x<`SB)Fa22Rx<q8WQ1JRlcV0<UrBMGS%l=I2)WH?
z;}n><AG0+t)&ab*pZfKj(RttibYHdUAa!yke})-HQ{-dNI*CdQ)E`I4Z8B5h=zX9_
z+{J5z^6Hkp70oTaz4%RL!*e9hY=w1uOaTe2K#FOF8{~zid(1FUfv;k%%y+2f5Asff
zBQ=8P@-Ld0We>rDsgD$vv`*H*T0|x(1Mgv3jwUWc+du_wGKeLG3EYjQ+wLr`o|G4f
zPTu{OXuWd8dK&>~ly++L*)5$DU!?dW;;}pnaQd?j9|Px>Ef;m<D&@Z$FM$b8a(g`O
zh+L8~yUw8$^6>PkC`cdLvW7>bee5J(M45|Y=n>8V-4Wl|_Uuo&^11%odaVY{pShHx
zuC!zU-BJ*7GatK|k`j>-V**bj&M+a-1G{Nz4_8bHlu>r(9FA7OhbWnHCFXfA`je`$
zJ(QJhI%`znurF9irghITU)9mF?<kQZW`#E+TSJtE<#2$ZL`eY8%|i6QkQ`gug~~(j
zCU%ek8X%CFjBODhZmv)8`VwZKWGnX`!r9o6>IO~M9v6sbx%@xs*B`5qsuX%~m9Ms6
z%Bn=o9hW=xMiUvGvC<Lw9JHy6-<P_-UmjI{$oL*Q9p_{C{OIZZLyI+r4PeW<xt}Xm
zfx5!u;8H4C=dJv2Y@3o=ZJ=B;sE>Tut+v<|+RShj8^51L`@ux7)P2-uQF&-sr?j&`
zJ0IWA?vbuj`2=NGQT+0l4|fmPSMgCA$AG?5a?TOVIUYqz0tr)FAV0!@Mi1BnJ1vR(
zp(N8rfTJUnz8~n4yYZOJ-0=slzry&wt+hmdp50s4z;F^31h?e9tcaAAIcEndAkkr&
zkd%m0P2h!%hqNy=_S_+pc#a%1Q^tdCB45sdPA^K#EEsM~_Z|o)dgo>E9=dEgoRq32
zLKQqWWheJP?Js37m`w%WX+_Y8@v}oNx+LcH*UbAeVI6)6rG4L2WvgOc&qUOD?UbON
zw}z5`pU0KzAa100jqw7`S}QcsM@RjpD}intESd~`pAyeQBcs%Ibz1h&8Zdf&e<vrV
z@kBgjifZ>UC|}j`@r1v6?r(5hOCeY9ehn!7wI2itf^~y|v1Fr9Je>|_wLtrSPfoUr
z1hU=Qlk=q64bXoMgzu(Qg%9gywu8pu9nXUXJ18DK@B!-57DY1I_u<wU@&iBH!|M~D
zqmmOr`g+;~^>{n~AO(3A4{Dz?m*b6iitix)a>Zo*KDTCP8buS}Re7;Nw3pC+dE6TK
zPUJbN(4#rFbRuU8LKn9ma9jNcoVa41Rr(Ez-C_)s+WspJ{<o$kjT(~wS;L0$98g_Y
z0hAHb*(LfK8iaOn>fx5YYT{lN40SN3R`JS}XHj<AL($hmzR$33K-Ld$lS3t%QmzJn
zRWa#t-ux}k@2e5aD|lhJ*&YcZ$~Qkcr<))fcNM^aNw#prQY21>9WAZ2uL3mA+bG!^
z62iei5Gdb|O0Yd0V7TE*@SS#MVZKg3H1Z9Sg6b0p%o*TUVFKd0SY~cCP+`l+iAtP*
zy^)_k{aMs_v!dhjUDEswDg^l;8&vr{`|bfymGYf2=(as%u`h>q$Dd$l(~&_Ww=}aH
z;N#ENr?^ZaOyjDfQAVSDAx6!voGMx*z9Ol`=^aBu8t~K`QOc#yd7|wbG>|?!6l^|h
zQ)GUCb336xTFBUV*e+=bngT8G848i8GEM>tj&Q~0kD3r(9;lwgVSOw1f{b^k?lyGb
zOoe#(LAC+8YE@btyW@J?V;^ZPVgdhl#6i}%LUY&>kJ5iTUGi;TkXSX-<?*YAGoT!K
zmB+nM2;-&DM4UeEKm{J3x?Z6p4wFJ|H@E5VEFZs)hdcpdjxX{HA{|-sO??iv4{6Ho
ziSwZQN3x(5kB4o>LnPr*jzXdP!Pck<u-Sn)WSQ6s0gErg6rTf3VWdQE1%={BbnXZb
zQZF@v;@8+wCp2#`-0fpX<%KJJBaqm(Z`!uk+oSTt=oA#|R@Fo4y;)g2@)ur0X{qnG
zvOuYph*!K!|KTow{gnwKHS9H;(m{C~aq0BlIz#v8fKSTD$wV1ij2VPh#IATaK=Cjk
zB>2G@A+yMlsX^=FYn_`f`}{OEtYQK5VWnPYVBSF4olY7P+7Fj@QiH9V#?Fi?I{8FB
zNO5ORO?4tX3#%<laY1rp`J)ObLi`6FApj4(W_TL9(3(qyG&3(QBuxT}o>4e+#gr$&
zbm$$Id9=&h$Lf9EK4=)`OMBsNuq+sI(8Zmq>XPO<7u_@pQ09k`yEyll3BiJ;w$s6X
zU_2xjo#Os64qf3uhswQpFGsqq%D~$*ga*15@adQ8(^=S&^B=G^gWQ)uYuERr7O92-
zt78^Oe+0Gy9qRUD>?p6-v1hCZZ-#@&{T&qaA#604{7G21KbE<WYH@fbjzJe%=IKxZ
zof+f4K592Q0DgqN8QYxncUGS}mqed(50M9<;1W5U{IPHt8Wy^!d3yrOX5P>MT1`!+
z{7DzY8lz31FEe=aV0RM)M}nmmeVGveuhj!)U|i?Luhys73?obMt_0yDgS_VqWJc(6
z`5Fs{1wgJm3tWKgif^qM1`6%K!#LJS_dw2E51Ocv44sD@b@hx60*pCiCB*w<Pkg~U
zN&nXB`ceO{3)$WqiBC{25H|W$4g5u8(2APZkk)fz8f+cUN5k0%cq}%+9y;Qv6~R~v
zwrf6YT%sPhj_@$=5wMj&L|Ss+7Fgzat<Zip25FN2%wXMXWQMcQh9%C0IYeU+r8qHV
zk=WYL4$jB76CTAwTeVtCk9><?<n=xRu`Q0v79fN+H>N^8gf{o7j(Vn%$5(?QV0^Le
zS;b^y<Rvm{C#hb0$PB^P@vYU^u%iDG>S<GN83lnN^UPTEv+-i!nXmYr0_M^#?sM^w
z&Z6tV`RoW0>bx;z<{}(VHz?QIjO&o=JO6lN=Ky9a1s6NPNIJAtcFa=U3XEn<+fLZG
zLe~*FxtJH8!<~=GEFZpKqi;00I~EPS9x^||C^^HpF^R}gSD!I%c*GveDUDM>)WBn)
z(A%KglzZ{Ld}#MzScc6EdH{*^j#=eg>cJ-d$#IGIq1Wq4yFn(D+HqnyPh#NJOnA5N
z2Z`dWI-R!WAT$Vy`dGvnGWD0~;EM9WK|=sCUBr5b73KId!0jqCCA#-_ykLR4huwzp
z`?bB#cQ+@gG=t@<xc>u!hlG+ZB0Axn9=cdUd!t|59yX{SKZGZV$+t*vq~wj;5FQ2)
zf~h8fSR8R+k^B`X5v^cc0y-e2=rjE_p%Vm25XF}ZMrr0l!_&_dodLRlvWMt@L*}0d
z@tRgVhk4&=(I9RGfQ~ZoUd7DEy9)A#q<yew7r=W17&fLWENR$nav!ytvq9B2<}OFX
z1Nx6N$OGD-Xdcz3l)c8^*$iGtPfOx8>4Nba5O@2XxS$_#(~H9vHI_#jYk-y)wBe{4
zK5+cm_|V6<Zoyx`r?doy1#<BAKh+W=I_Lii4Qi?8k!21_o`W>56{Y#uqu6dg$d%fa
zB7kDYsq;M#xb~-{on+EJ)0ACwq`4?Mr)Y2$^h2|NgYXnTYE$qh_6OP-KWYW}SW<Gw
z2;x1|LN{Z`>7?j820*=MG(Y(v+H}NRCgOqz_y&5iya3gOue1aOc3T6#{LB4r<CQ=G
zz5Zb-G2<C5*4$gyQ|u#aLYf|WHiml2C2M&I`B%TMu8N-6g{d3*M(>?@>-)Fpclsb7
z_MP&tn|n|wpdC#mm$V)8AS%#by17A>%^1@|Ya?qQXdu+wIP>WlyDhXN$(3#?yZPNv
zrd-@1bCMV1O-Cg;0^oIbCiLa%s|dlCjnEfv;Hg`NmmNcPCozmuhx>g&6aV8F&j<ls
zZ1sEVU<O2dvWGsLcyDd=#1#z1y|bTR;?F(vW{1<Qyd*XkA1#*ped*<Q^F^s$&s}VE
zo=sRdew*fxT8f(qn@E@KZcaWad17^)QaKVSuRo_DUHmQV=%_<V%%@IVaUn)C3x-QI
z;55T~{9yaH;8ep!(qOL6Ly3DigMz!58ss!pfoYKl+gGCYNt&yzzJ`n?c}IAu4Bdmv
zM3&ef{023s(Zqz9U^@4ssn|=Hy-{RAzE!p~RG0kw2)jk+@Is1q-7o_^$mxjAM8))C
zKsw7TZLCD=_zT)fzb0WCoWLM;gTaNY=9y`|Dqu<1lA=|D_`EMJ`0hTHdy{8r_?(b(
zZYkg(>4-mO+Yz01UtdE9CLhJ-l-E)e7LKx8@pIsX?hcQ@<a0BY$A}nb^=7xo)3m}N
z^F7976KVtGVl>|Yw)G!RU2Ige(3s9vQKMH^@x6QYuLfPok@Ish%R^kaiqWp20I0Q}
z57k8@ogS^8#N37MV~gLs{axnSHVJTesCbdfJB1}jlr9bD<`(ijR*U!;JrM>$y8|Z}
zFH$_blnjV(dB7X~819jq&|uDy*p)`yM9@zT3oEkl`pB`E#f-x$K{H?sx>=Sb&r)rw
zPR49A!_MQ6*nNG0UaMg^whvzv)kG*+n9#AEl5?v5Bf0vgsREFN+#vpxFBfAav4wfd
z>MHorIe_eJeFO7n%Cgo`aF#*dv$6|0T&p#dloiQS+SNa-V%EP3s<pF!;W#AaXF5-n
z1c(y5WBxe}{FNa6_~|D}ui`mmhdT59td?t6>zsD7$l-s(${j%09gugm1Nn+QvHO<)
zk8gFBfzC-Xp(lso_j@CV6J7gdy1RO}>r?fGzu-o1c2HkHjf%eb4ZVLjoUiN8{Pw=K
z6bkwW{RKDrC6jfHyS4IK!WgzPTd&>v>P@VBQ8I|#-S%Mrk4mpEb3?>`S#SOJt&?h0
z11<v&p@01Y>$#egnd(2jmZ}+vuxEJR%>Vp6sv{(OkNy6*R$l&EZN0V>wvdQHNO9_v
zx)ptnX|*<8D}Yv88^3-X{j~Os%OPzPjeh-0{|>Zh&B*`5r_9rUVVl=ei@Q#t#MA$Z
z2nl+Vpq|@uEAx*DBKk?rKwu*oqW&wT|J4nJ?=PARZ=D&*PTj<p<Q#VM+Lz(2p~XkN
z1>t^3nAe$}JLwXwc#*GOnczN(!RD0E)GI~%X~+zmYV0TqC%`R3`Q9y|%QuG22J91#
zgbpz{L@OyICZwfpx;$|pi2hhlD6IT%O7-BuPa@$&`bTcPE=;d}4?M?}C%#Jqu4$f;
zus*ae&=$r1k8M+gX!j0RBFWIJcQz!gW%|cg+V3X_S^F1*B6nn(U8d#y>eVa0!;a(I
z3Ly8i-t_t+`qxfdC%d;2y)51*!9*EI#^>i|efRVqH_?~AL2Jz>iujb^hO=Qen0!EN
zdvFicd{KAcBaGy&%@=R{ygey8sDgd5yp)JP1JNy%WH0?cTwc-gHgdi!?eabNUBoKX
z2YId(4LGrTd$7Y8or}jtf|j~Za<YFapP&qFHgr%Q^09ooe_nU{V3ufi`eLEwRDswb
z5;NhjQB2JzSH9jqgAy8GQp@`DOSRPmZ_$>>50YfzC@8*oB2&<wk^NF(W=${4DhDxG
zrG&!dMwz+3n`Qz^8@bar&5zMV2qY3c<tR2M%6=$@hF+l0{~OnT<>vlQ0}RuKEl00(
zBYD<qL&|S&{KsDT&z&QvjzUCpmRIqw8iNqDLE(Q#CmK=o%%k$ERChn1&^+?<l7H^`
z|8ep4$O+wuXaDc3Q1H@Llm9S8`SHBNB4MA?)9SBKzi^ac>jsPb_cLRbL~*I%K<V>!
zE8V$%cTfC(KQqF1So#$uj@O>7{N6(nnzfa=db?{U;kw5Fv3o!3aT~_%Yys=T7D%ZB
zh>?l@>c9W_({C>v00S_3=zdE7^Ir;@5mo>GxK;w=Z~54-Ti^zqD@db#zwjy+Fp+g<
z$GUq)-?onFzP6axF2IEmj;q=U=Bc#<qhpH{GVomc)qi{1t{@grxO4k<*cRrUfByP^
zrx9SN4Y!Q<EBx(ZO_&75|L3Qmpa9PL#+%LwQ7IC?152|1eVIR>fO8Mz0+b#02du9F
z=9mG_G3gMBe+2opkv$KxHqmIQ{j{zX+^4wF^n2}tv#Vbc#z4xfu=)pB_}?HG8rOIb
z_Hw3sdh<&tNmCK@1prD593F&6>_r2?6TEA7jOz?>AE<3yWIej9BwhQRFOi|jE9W2I
z_!TRJk$BP<{PBMysRilLb$~^Ml%N`^>OIllR-N}nC$rUyq@qS2@UY!`S!i-|cq?cB
zim8GF(dFMBJbX+C?kSJub>kiG(tmmT5pVnZaEC9_MJkZJRePU4aBEk^(e+DX*W_`*
zzm3CLw*ZdWcnYs3OU{&<F29!W&!7G&a>1+`kbk-Q-gMLusfE!eXwui;Uf7TK$DOrl
ze!}wGyH{E4QqW``38k(>^Fvm@5`J>JL2Q8}^Hfq5D42cOPE%DmvQuI!3|jHtzVWwr
zlvcP2d%EQ5@82Xyo8(;ob&5K9CG(SNB6+(0MD*5xkNniUeDy^RC{6xYkpI2^!o(mb
zD@5NA@c9*JAl$zD-@}@U5NT{YDHf8<oD)TQ|3BYK!wwJjvPa0)Uk3qX60KyKCF>p;
zdRpt>i#HRx(aPYfLGX)$j9BKAtLtp#Fh>7bmz1tv6vRwI9nk$0F5KX2SU;<&B7Vcv
z`oBTT`WJr*r1u_hqsD~TA0$G-aV#^-|M6QHw)81Zs#09L==Zn#^422W_Fpls%n=@u
z+4sYzSNDFIB~i)WF`Tue^w$?SqnT7DS!`3JbsN_&!PUg<w^g}5v_s$r$FL=nwIL*e
zR01S>Py8P~r4W@l^whzB_4sXA{(af6|ALi2t^VgnS}w?lvVuzM`>DUBW!{p+?*DTI
zZt;fqLX_(-2DRK*@-WVSd?^g}#Lzh9;Mn$0llcT&l{e}P-SjK?USDHa0tZ6Bh{3u~
zq4Fcyi!5z^JTXD6GtdwBuMJr99x!m~X1Q6T^lRwf#q3QoUv*E%XhGC-Bl@sHL|*?P
z>+gQ`PpjWBi7_+32TQ!)@rvx~*Z%yS1-hc_bwTsbqm>wyeN@KBe0~H+DtqE-_dh@X
z3>JsO_4Pg%HF~G|M<;|Tn;5J_u;YC!K}t$nLn^)9`v15Bs9`q~Z~LheZP3`Jcl(#Y
z57l#Gv2EyzieR8jjSf)j8LvO7qJv0RwN#z>^SY^0NcOI)W+5ypI8SQqU;s;y3!Vqd
z?+G1-laMx4?zedO^Tic)v+w$L<7&!xakVGIWPA!w#;uay`I`RgS_4NTY@`-F^R}O5
z)Y=I<>9`%I=}K!r<<~5@V>!mQddJ<KoS*{sZ7Y|*5=Z`TZ%nWi73hYYH?Fw}Dmwj-
z^7yw%gzTrwP`$QoQt!7>WQa0Yl(G7IYk9>V$&>SA*xt!~i8*^<SxU(|9b30v|0@8(
zv^vNZ7;oG-wDDJ{M@!|hx)c2$$NL|3-TK*)Ti%d`{d!g_4-EdZmd^ct2XOKFRo*>0
zFIx0g&c5FwkDxxOiQ0c%yz^r)mq|WO$XQi~9;fKpwzd~n7xq8Y_Eg@t;gnc#zz^<N
zd5X?dq=vxL2E*d3Ah(&dTp8+fM<1w|96*UYaQf;UewaHk3oIP2b};HRd><>#Vf#gU
z3EB{%+6*}kySWPh`1O=V#tdi{41$DPk=qn_tFZbbjhJ6dam$a}rjaIMTqho0*+Oek
zx)@pL&CfmE_v3kcXBHt^xX2+(0<sQhP68MIOwD4K7p0#vKR`3n2trpT7(P68DMFBZ
zgB9TCXkyVAfYs*2Z#rfv-`jx^ak?6ilsxO8L#Yi`kg?$XRMv{KXaT{P4`{6_T`m{p
z|Nc_Y8w2XoyFUcqR2N`L?%lQHHdWEVm0$n5OY!B+0-w|>ufmTrpTTG3E&XhU*uF-G
zG4J0{J~J4u_wSof4GxsF5z&bsk)$uF2sG&4jUoX^&~U?OFBh@W?lAm+t%4tKFbE~x
zF~*y33sG?qzk^Ebi$)L7^D2jrgWT^?<F}!MYx@Q6E{&;({LdTyQG_^C!C^z$msi>G
z<NuclqN!8+UPyFW0CgaaUK;u8(<l)o|HnK0aVzIhfR|dCTR<Z2v_R$UF9@w^))6Y<
z5#TE`3U7y@C97+6<?%#Pgsm>*Uylo|-fN=p+zrd?f2MS{jL(71pl!Jc=*e30OED|_
zD`!4(&6dAK<8`@F-vc-exy_F4UHKMp(D`C3kLC~<0YO$N@GK%8H*p@aCyNDgqUCr;
zD-1HZfpp=}t7wLK8DoY#@(aiz4b2wK@{C)LF%otYMgN`kyg=J?!iG^mw`>*Ap*rOZ
zWMpn<S$%e^x<Tk?TS||F(gA60PJq|;Yc4d#Ux+}Xnx{S6G-6~HEHs%jOxp#$1`B{K
zFbX}vbTr=x?JUssSaBRm_=-G{5G{6$Wn>1}suTL2sT;q1^eHcJD=^OVVrR-S5Wd$|
zf5@k8*lsA(x<O*uz#QK!9&G5L)V~-~uUk{a)zMVfk&%-m$xq^d3tK5!eqSlSXN%6_
z5H*a>_N-ciJzzovk#+(t-qq;)+^z4FFNMV7SeoA&2!IuR0$37?josP-`W9-`W*n;*
zWy%0)uOhIT6QSu}Z7(F`OjgT@BtU7}<pSGf3hfe}gR2exNyf@-srPZcstQ`Q(Y@fv
zSj`A#>?*ky`ma1;Tn-bWaRwP04~rPVn4+O+YjCaS{)8-Es$kPxIl_|)c3Gv*pzkIR
za-9`(ZZs<al!|gO#{x@Oph>UWaY`i<-1Or8+saaafdY?C0I%_A(`jD##8eOuwjrS~
zFcOoHa29pncmiF93BG8$JsO~Sx?V=;;(k#GcnqCmV5jt8+V8q97{en^n-*j_fi%wP
zH(#0O_dOOB>qUce%g-oj0K3MFEqU`Q7GTkKzugL?$1vLQTI%<A;Ao~DV=&Y;bhrW;
z7<(oKOwXf1ZqOd@%9$G$eF7YS(->u8&ac4h6*o9L`++?oBf2K+%ubkHXk;;SbxC<+
zT3vp;u`+#(k+Safm?fOr0%A@#c{e>ibGywV7$|1+`pn@juL0E@CXokQu~+_Hurxl@
z+;w62<YWBYuVfL-(E>}tKgVgzq=<jpuZz4LKfZYw&3=GC<^0mkq`;PMpXNcR5^(0o
z7w??HnCaFAMY(AEq?%8C<vu=b7v_>It)W-O+ii{sESzAoHX6o{3;<cdV%)h&D&O)z
zC*{&_+myghbnVX+%*h%;Vt$~n@z&%YUN?fISa1<<2VI@35hBSYNXsUx<|wMnWI-Ij
z8Kh`FMSPh3Y<il=kc-SA+9_<%wbG9%3!G|J`jqJS@S#PjG!B99$h94j9;3q!V9L(V
zCG~>(w;h-kr=%$|!OFD~*B8+vfH98?H92xUrDv}%eUBK?StNI}F_+pP19Z?s_f70E
z?VTV4K<*7(y-UFB3o7CQ6Ez#L-VkuV$j&9}nFG!b@-9yT9VUmv{ABjNQD84{Hsm9U
zIdZ5$x)iYP=F&wY<n;5g#OT2vb3_v=qEl&l=sU%|?k~e?JL*`{{>Z}}Ck0cg!23{o
zH&fDXe**`s{Nn=8-kB(Fh|2W!>PCoUhP<uNXvz9M_$W?HXt_l5&QR|$U^p1Y5x2gY
zA%D;czWMgxsxCgK?pU4%{zuh3BMc24<f<_|JWT=Kjy6?#Nkbt+Q(?B?pmC<d@{u(r
zlQ#A;#|JXBoJz;<u?{%GmX{EW9}jGj$xcpYwVBi$xOuNKyt}a(2q5EpB??6DVEkg*
zPnH~!6XykHSrvWez~5*G5&FI}W5bSMiWc4T2CNvLqD9s{Sdv=;FZNo%2yINzM~6+}
zH6jz0Z7G&yohfI`S-6nj5cIU8Ywq)fkLYRW<Tfv=h5+@adcQr8pDpRePxfwQ%q~*0
z1{#TZ;lQ*brJ9m?Jx}9I#=RHDGv9!edR{>b;82(2MXLXN;Gtj__8vjDGb%+Je*TW2
z&|jz~!kn_LlX~vS!gu$!NB#>B1qJ^g+#T`>Rf|>-_g?{1mj{e27-rgRQKecOL9S2x
z@onT!M5-qvC1#NU4Eq{fBDoqxm!BUo0mm0nlx<=gZ>@70PmwD-(-9>3q?}QG>AX(G
z@h5{F;PEyJMg^SvPQ;U`Fxq$ChLb>vOOHWSr1<WK<rZt5Hav6SFwFzjS%>M=<lT-z
zdbdEpF0nx?e=0W%kw{UC6p86=<Xj1~-iX8T9P!4!0ZHKKz!@0ddB<71VjMW+bN<A~
z$t|KY@R6qPomtLaIK-3pMT+W~OI{E$CDRu)Ajs#DRH&P*xW%m+pv&Qll;U$gOkpE=
zfy_1r9px>Q!d=VioF6>Nxv0-MR$B_Z`UGB=TdaZP%1h*vI^!E9Qv&Qs<Iy-mX@y`+
zPa}amZv8n=@ZR9}+mLfeN4|)|0q>Bf<<3b-vUjB<ck+VS1W{?mv`=Y*jmZgSKspjh
zdl=OqlI;=L!>?nDZ%Qx;oXk7S9_KFgL5pbu`$l^5N&g<>n0Lk>md{U=6F=LNA+7n8
zCHw@>L?++j?quWkHwiIf)}LLXk*k=Qgsx|<Cyb(Ci(a0L7Kykwr`2t_bGk*EKaF!%
zb@Vf+Gj^-s=H`Lq@K7b>(g$d%_wseI=X%)?Y1a*qYsb$-Ii(MNX;}v11|==xpazQ&
zQPNJEgoPy|ZnyqcOf~H})%wez=AC3%wBTqG&%TAOx_r`<X9C1xH1%RcZWS_0k+r?f
z?P3oO=SA_L#Jxy>c-6D*S8DVVS0<nd9u@P%nm<|7giI%5dzPw$F-0+Atnsvp;$D+t
zpPq@7fc^E+MuM*aFy4hL@(=6nz*g7wDmTu8SxKXcJO@Tor_I!GHeGBHCcA5<_IKu4
zGkwvky-Kc%qj_oy1TP8koQf9ii<5f}Jn#(*kc*Q!Q5Sb{0o*k9T-)I;d)ZvXtHoo-
zi?6WTAJ0bGj+mBSyOf{=QRo=?`wIsAGT!~3#4<>mo1H}N<bSCrh{t35!72OB!ToK8
zi^KHxU`~<|IGYJ6pfVCdyg0&VWAdoiV#f~t=~|0`x<BCJwW1lN<fs(O2S9s3*ew?-
zZC+w`$a9JR3QN3-P5!LKQlaTfw^1Q&WEvu3tNws0w7y(P_Qsh&XaLNYbL?6^8ba|3
z<fwP_N)DYI?1AA0k|rgNsV;RL%@XRY95!Zf&?O(%3CM50RXsc2+1|PUM{m%an1)ri
zqjfuQ9%QU~V<hw4gg3hln}&ZLwmAiqHucDI$XA9%IYca<4yHFkO%YtJ*ABGOqhsl*
z#;=0uw`$l3bXxOUaq<{EBWCcmJ>B{3y#$dh*mu-3BBN5+9mV9D*M2({f26-F$?K|{
z)z4|Z&fpy0vri}cOXUY(BV0;IfaARzs3x6}M&yy^h`N#x(c@=vgj*#sT^#el-5d-k
zIxGmmx5yKbhoC!;LapY7L+9BGA*nhl0tOrzVBaa?$=X?0Al(z>;!dZK9Heh;1&Y7i
z;Rv8yCJ#rc=!0xF)8_Nl7+WOxfTsaosmw83bzDVfvgKmJEB(OEqz%l4T;GSl<kf{o
zM5lvJF{zB4<QoKm;azKdVX2BPHaCC%qg!z12fx8l=MQP;CzM})uC|0>*qM;J*u>DR
zH8xg*w6hGZr~cO?SwOSBzje@GKyW}`z=mESs$)btYT#HA!!mp<GnKff5Gv4?kCCM?
zRv-&Z0*@y}_y8N&&zDI<J@1jYk=$Y0$t$|EqpcE5C>xjxN%8|e__g<ziNd+Tv<4bN
zLPC)_X&-5`nPhriSASiY3RZ`Nq9<SXiy0j9+1CrfCK!(LD%D2uta?Z!#@UiU1Tz&<
zUI}OI4G6yX8{VS9$kqZH;7Y0-nK$^(R#?J@4|%HxQM9UZ<`x0rEj){W(wJ+=uyyEk
z3L1cgxOKSsXwCBlBG!|z+%6nB*`eSRHa_$CglG`edB=hG@p6?CTpvh~waK2$UXs-U
zzb9gY+HZj1j~ENFqGEI#X$XCKR>;F2^_5gEF-WN`**EBzUI!bN)Y7Pv*AG^g_zpT4
z=CIgQ`Lj}IW|x@)VeI3kv$W=f!~80dW<eWuT0n~H?6LRs(dyQ4VNkxBV<iNZEgJHJ
zH&QZT-$`G~2Pfm)Sfg3s%4Em;#UjF%m=+)8RbwERW#NA;<CP(Ik^%GP5GWfT23)8>
zhlyKN2|FYQWeewdk+zEeqrB20PLL3*p3QgAz4_U>%?)-~zdT#N1dc|z*<uisUvF--
z0@1i#VMv1K_oT>erfq0oy*bmnUV(~x6vjsbCku&RzFI2pBA<>ur4&4w(;`B53$l$5
z_)Fc8WXX&)#CQn{$Z&Lofk=TO<Qzw{M{*XJFN~b}6fN@pCeX8@OVjBT(~)Ii5vQa2
z)dLFgvb32r<Ec$IfSfE|Y-OiCiI};FhKf)FT%`V2pL-k7#N>5V()tdb32OTBQeXe^
zdb|UPZTaXpi_NhX`vq?1^Ml2C^qdV5O)5!6HBRNp&}T3nqsZFI>1<cAD~c)|NS7VR
z;Xix%JmvLjo~FG1q=nd>z9-tzSPNtbj9j4mPx7{bz>AQWM;O5EeGfqgFC_`O@g#WF
z@`{RsBy~uB|AdZ8+Q0gv&R4wmm3TM$T2Uc9@-X)}CbtYVBaa-!Tq$b$r-6d>>|7w{
z4-(S|aC2cGPmElOH8<w)FhaQ+cMj<t!-i-NWR?iZlKyD>t($9Toi5LvHHZQ+c5>Sp
zc;^xu!b><ofy4_F2#<}u@?SiPF||UjiarlBTN2Go;{z8vKy_$-9nOjVlY`SpQ4Jyq
zWyCb||AdB7)_q4V(vQyV6WOt6G&0Zly&BYs(@)IsVS&V#Yxt(b(2NG4JD$E02+2Du
z5*z}d`T?0>tl(KrtJE>>Umc1qP8;xlzMc)viCg+)CspOIU5xn|RlI$O0qm0CPDUIx
zpk8vugd8S|AH7yB^q7)!`J}%3D8c0wE>>>zO|%OHG9x?jZxe`sY}XgTQcwWVtepM^
z4@@B$6Hn>--U%}JqaL^y<lGmZrvuTPM3;sB$;)w-G<8C`xU*hvOzKR3b?^Zd*-;zt
zO|=x0QxLlSmhKG>khEjbZ#ti)wduQq0|2XzqUb;u6g%bzOk#SZbh4lt^#?41`x6y8
zgI}6V5ElE6Z}j==Al3@)Xi#Do;hmfA5*NJ)HOzxL^JokR3Kh+L^e4fx#=4C`CG(xF
z<ct}N-V%;C1eF=GQE%v#B-iGGx&y!ATDD8mSsQ^m`;#LjRl0PGxfO{qHO?-3Ubh~P
zs4>tmpjW@ZA1wL;&gFAf$Og8{GG$L=#KC`;T>h|W{9#bH)B54*9}wOyK_X6PU2wNx
zVll^f;AkJ1_sl->F%2j=h22d4mN!oDG^?96oy;suXMWoMEK&Cmua!H2y3N!_hf%uS
zB!?yK?wshCWzdagAP(V8fJ!3$9Kp%b2pG<c@*?{(WHRKAVUCwizaf)k)0vY|N;@Sz
zsg=gT<0tVvsBIw?a6>j7PGgsjkCu-aGz|cQ`W%3x^;up~OCUXouxIa~JL%%N3mvhJ
zaKcT?Yc~|UgzVjoWW@JPF86S31VeC5Kj?Q8fx$Zqr7wlQ$b6Bwph>pzbRJ7>`A5JZ
zJs7X8BVDA?3;sm()}SmRu#;zW;$%;!t^3iub$(np(@C|81?)EQairr347l*=Bn?BS
zq3W<eS9Ix>WaSLHF4`$xDM?DxDLsY8%8W5E%*d={%zO8WNakTlj7TL+pnha38FT$W
zmbB|DAIUSV1a3y*jO-ZSF^2JnduU{>vSs~D$@KQ7?9}0NQ%GikKm@>9L_9?+s1FqT
zxVnV0Z*Yc6>rlHli{z(0R^(JZhXyy~SLV@yb5|f3#b+vZLW$ee*c;@=4pVqbNi{$k
zISbTcL!OTa3!*wi)V6k;t7rBa8$yr{@>UsA)5AXlX4=u`FbpYX)Ed%1;g#WA=mlhx
zetg)>DD6*7*tGNT!MSM=9riwnQ9)jIl(smX++~7A9vH>7Axb)M1H|q0F=;UA^RFwx
z?tHz|@f#B1cx0{i?_heYA1_@}`R+-)c>I*{8Nbgi866HBuUm&7>zHrzx&c~_9TU+G
zOmUVGVKpdI|75dAE*ro#L-3Cimbg*4&<`4pu%qB4BYV*_>>JKIAbfw!x~7tI&lctt
z-qqRxP^1fY=&&h@m*nfNFhsKmW7r}KgL^-*X|c+_JzP2x22j*zS5`XwYz{j46_2F)
ze|D9dgg&^GCK&Ta-KWUG&mfzBB>Nt6nl7t^`6ntgxXX$);@67rc=QI<W_M_txHY_Z
z?*>yw{mr(4IjUp@k2?J*sJdwb-+@*7)s0OZ+E=!DHH+7Vwu_`*e|{pzcslky<e^Qk
z%<E<f3~%IrzK}YMr8@}zm@p8iu!WlwgG41Qf?AwoKU%lm2Z-H&HLO-oxF3BBsw~9V
z8C!Zi_~Y!`lIRq;Apn`(gX!k3(>7qicy#HZRR_r@;VjDos!@W8gH~<U?edmnM*Am#
zH+}Wk1$SnhXP0G}6e?$EDga_Tc_@UXh}J%|%SUdS-T-CeH^37x9fnb!Z2e@mh?H4R
z%p8U`nVc9=^dFGvq{d_-3*<+}I7QGvqzrQnWx_7y9e_h*I6uZtT1qjVhetzZq#x`T
z>pr2KJqwULcPj;6iF<pvI?P+!ZK*vAfy6&eh0bXlp{eljPb69L-ss9dx@@+-N`(dw
z1!nL!nuW6<Sa(szwwG%|3r~Q^?ko`Rwq}!lT>x%-qE@M=L|6sn;RAeK%#GO{oBHo8
zaf73X7?I(J-t!Znfs8z*o}XPd1izA1W*UAQoca`PkZc*T`#Cg=Ho&y#7X=IbfwM2a
zFV|G~R=)hM6ulH7B7HKzIy>KdpgPlT<~qK}*CmmVGXn02>1YHG(uY7rAB?vI_hi`E
z7a_`e+0c=-pdlb|EPJ8RQYdXe+Ht73j3q)B=EI%pKvxxYa1ja!|1=36a-=OwYXxnH
zG1mAHBq+gvG1HFF*S>(cLlOlhyMoQV`=S6}0(<xg^0;SVXFI#$33JP+?-}Wb(Gx65
zd5CeYq@`k%;alDV`QQtn8I~!^jDgsJT1E%R(MTSu1g+CV#Kt$g4QO2$Kg_?9I9a-J
z2N#tLXQO8tQFQ!kWSNc@-NoaAfAgOGL74Pbr278t3CCwuu2sJx+v7baZ6q3`ucaeP
z?ZY5DI~3i~F?(SAqL9HM9*ZqUPC9})(UM!x4LE8a_1bL4ZpwpIV+7{Y)zMT;{2>{T
z83G*5j;Hb`HVeP){Bg#|KnD_oOoS^nK8L|_<0N@VG)&4RGZ{u4p&jnTiP4NZ*69XS
zpK{)%EaY$h<a&x_9n5vaOI?K}JO&3s20~v*2bEz;S^Z73vB-m9UZssV=;EPgI%Sr7
z`*$bAEk3aBs*3RPJ%tRtm@$mGq#bQwCwkORrpG_9CNB9IgA0~%>t=u2TG<QWZj-SL
zTSOwXRh<)i9eFRBpTf9@!2GVJTuTEqP9sS%^%auV!oJBgKhx+|xbu}I)FRhW_P=8Z
zES9ui;CBV5HPD(!3<Ucys-r=-->;|sFUrIZ$GQ`TR+8EIQ-ZgODWtA3bMEWTr=ejM
zcIHrTi%m|eiA$RGF+#W|?UY=$1k|<+a=W~Vop7U3A}pSu6eAQ%_@{$g#(;L3Oqd_B
z8BP|4qvV=2b1x^q)7$^DzrR$Fe3D{(06?&v6DJ)Ms2X7K!XQa4?UFz7AdGUnzzgTF
znDYZj1>KRo*^?VY4<Vx6^I@0tA`$0WxFtCX8l(P`HdEchAzff;@iDG2utKDiM3@5x
z5s0(KcsBVHPfw5IzAlARrQfnX6*zJYO!TTF<X$(|=T+cH>_K0EdK2%&qXjbzTAn}a
z3OZ%wc>N|e91WOB1UxQeJjG=!NWyjvyZR#Qbmfm8)=FF0%<|qRGfPePtv^o)ms8JC
z^$0iY5UdT;4AuB&S!cuTf+Eu#-##hjg$mp{k_(6R4=jDc2s4YPTSPE6`qdRd?11N8
z6ljwUhDM%B9{0-?-+!2r(_OLvQA~J|2VgyM=PQp~NO;MpB`gjv#gFRRMWzswy;jP9
zB~I{X%s=vC4mA(Z$e=_qB&J9M>H9)c*0`?W6WFA$UYtp&yX|_D<T^)l6`bb>9b4UJ
z4?x-bLV<%9L&CFpUm?w;f)WGk=)L}L*xH|9#p($UmH2PHWkEsc_iZin(EiD9*A}75
z13A4c><7o32Wdr+S<#IZ!n~2*_#~H1f+d~m0?0@?U$P8>_WV?bk;KazVD?>Dr9`&<
z?pe^;W9}exnd+O-OjwCUSoo}CCkNW-_ll1&s%!+oVGuZ5b!LtGs+q!(&RDMj$@8g(
zrNpHJ<7|N=7ySQv-CRBmEa#%3qdd2x@ZQChFuOzZS?#URXWMeae`z5E{V6oR{B}bt
zX$(Q!IgOl8fn(xy`j4uDDh7j7>@D2@n{(Ek#S{&p?B|%5@Y>!9Z93M`Q<*KVV50??
zQk=3xlUfQsz3p$RsM4kbx?!X;n3IWn>70^z+FuN<=6XZ_ys9*ROGdsC<Oc%rbN?B!
zCADQ5de!_LX!u1tGy{8s&EzQTZ=`1!)5*s6T<43U16ib{JebSG1f;#?=G|8qd*o%|
zIo5;Ns@-M$L>K;K4gjD;sd?|mZOv0bExPl8h4W89acJu(ET`&T&W4HNQE;oyi16g0
z^I;6i1PA%lsgPkE7%V*tveNT__{mp=z!VDije{ukUkt6|&B?w66X)kS5?gw>Egg^z
z8_a5r>U7Tp@!PS8*_@_qC<zO*v;EQNqfw`gJ52EG#)ovuM!H|#dX=bGK`{rNtH9y6
zV^qQ)Kr`poSXPqz`B<d}Ky+jA3CXVRC@2(a-#Ex+5{6p%k8ZZE?KU_YoWD|NPXF<?
zPA~y7R37LzL(g*S$NAw(O0%&MB(GQO2wJH=v{+d3Iu&nx!vhi;DqN8AQNhyJ5x&Rr
zQ7)A9Cd9)8eR`R}D<VRdk%i`ciYOI1|3wt4DycqVvS}mWgzeO=pm{njY@GdJQ@f6z
zTD?(-`B<*S&{b#)iSKpqpL3eAbpYQmv3;HSvq_~so`-mwkgwAVeV7A13Z5w}7{3!q
zdk3^(qNSVfHLXs)Z}2`9QVnuW)ehJppq538&SVk{-ZJ1~bH<`C^7GT#fe00(g6T&;
ztr+%kpDZ%@SJa`*9x~7`-zZpqMx4v)ny{HzF{P2YX5ppmB?i{WXFT$fj~J08N*(j*
z9<g9%RKb@I<_B)W_o(%qe<uNzk=A9`BgU}Eg|WVeE_<DIXLRRp$^o3Ltv*UL<3Q_d
zn*kGy=yFF$2ng44w306(Zjw83-27^T4j_aT8#e-MqYUwX&$UcEmAJ*Z&LcC2)F=v4
z%<EdaaAgr`gFLCNMHV+fAOA4<D)MZ*;MFOsJItxnLm>7#10F&R%T107f!cwK!|hoq
zj?k>Cp8wbgG2yeT8_2urW?X}-uRE(+5Ean3cR<pwrJ!mmjQpC)x=)WRg6HOQG{+To
zd)QwmI;RVmp+SWl;YW&uF;~IHXDWJx<^kIfh9mer7npk#wTj<2{XS#gsS2|>IUE-)
zsBP-1bX9Ki3`P2g&509*0e&hg{YZRDzNZI_m65e@vdjIMoP5Y3#-J8qBOY)y`6F|j
zKQ74m`yvH4Ds-0&tb-PgQV}8PDb+b53=F@z2BuNa2V=7b!>2KrUX=#>vK=-W{}LR6
zMli9-szIF7Huo{HCC)pFif1Nn!#>Pe+WAP(`btQ_&MoLCsiNK33=?YzFK%J_^ck=c
zcoR>SD|hEKU}gpN-e^clg%|ZYvQx=y*wN4<9?%?`_D2fx&qReWRH-bb3K<O3;IQ06
z7XZ9VgXyg8-<P|;`yW<|6Vap_)PV-mEcnf~!*GJ&DJTi=5AF869;nw4LGvlWfC>0$
z2+t1$J5MyJ$@Kp*_TBMRzy1HnsT_SuWi-(=lU-)C?9H)P$fm4}%sy=^+4E%YmA#@u
zk&zuGkv*~{>-V}y^S<xj_n#i3ocFk{*Yz6Dfpe3#QO8h0Ke0OGZdDS`y8Xq?b4z0!
z3H-dGV#3BFOE8VgL8?SBj@nB%B=Uu~&DKWHu57w}r7^haLyKiedGydCy;Jza0vM<0
z7j;9?X_4(e4CD17ViV*vvVRG=u0Vcc_gv`0Tru>pdFM@HoSx*efk#LJ&4)8-d(-;t
zL+V-t)x#zN+Da!91h=T2n|{0ElPkly)gYmHC9oMrSVM0Z_|IU<lQg2@E{^XFV0s$u
zaXxoKKeqZd<H0T%`3w`6{zD`9o~?D^`MN`6*uVXtOZoa*0<FB4c9gT1LX@+!%Pfy0
zLLrzM9r|X*G{m8MH%$6#sRj91Jgsz{E9~2o+6Mm3XLBhjmJ`@9PAs4pN)0@Af1j(c
zl%>hS+n%=N3l3&0YEO^9jk(KEAbo>+DrK`5V5&6#ty>nanOtZYD!A?}x}A3Nf3QjR
zLuC@Qo94T$6MYvEwnsLQLm*Tx<SnS459`Y)u;S?bRkt#KUqcTSp|X#_a|)<&kz0Mt
zuZr%;0ccc<!^JE1YXI^B(W~P{w`#KEdsqeJ6~A=01Q9KBez`avU3;TLcN>y-ABUqm
zAJI!+v)IC{pi_9;Vf`A~{=BFm!Q2cH1;&(apmB$=?o|cnk#GWbFIDRfXj(=rT0m49
zIXaQoic?p;jir#RFc{HB%Yb>?>f3OeHCRj&DmAl+N|{YNG8J_orAv3DJJD<6c3Tw^
zP@j!Sz2fM9HJaJYVo4?;vLttcf<9p>p3QMw#8iAiUE1;AO!kV~gYg7s1L)MZB&8?V
z9*KcMD>yq#6Y5jx#;)BWhjfQM&vegmx;W3zg|r)LN6|QYgRtH?iNOOd>0y)h6q5`e
z5#WSVI;|358KE`gISHnBMvI)44?6qQ$UDDYK*%zW(Hxw?m%P(|sb%^$5P5EjilPI{
z51Kyd$8!rQp=&=GMd0*nTv)F+e?ut!XriFw+kqJlHtLW3cYZhg@lPdC{(D;TD;gU=
zhDaZF6I>kY1gv`$J%8X>p~m{JSj2k3B65B*`V-wHWW4>_|D=9wip~{czt1OLQsK#e
zbVz=0C2am}<2ZXIhPo%Er)>WG%wMvY#iOhML_}(xaK_yJjSPUmuRf-3YyZ4n^S=5|
z=GJM~e`ddcdzWSBbpNwH3B7Ot*xuo7{~_FECFYi;uc-q#L1{-X5Y#|zmJLJ99~0Le
zDIr%Ld^r!M(ZN*AoS2BQJm=Sw7ypK2cC@`AXX<0seM^A~>-*+sA14s)_(33g#YMOx
z@)}6=<{)qfaE_>VfC?BXX^7x!2Qy_OgdVD}F0^ajhYU^Kgz8CIh0vi8;I?|hKAAWC
zSLmtxdrBP}url-^Sn~?+c+^w)k~{pDgsA+CyDYsvXa}I;z8>X@C6=HCln2$u@B!oz
z{o|JY(^?Zs+V1PX!p66L2T)HNlQ#Y#3;jP9${R9w9fTAdTdE!V_2HDK5&Y^VzR`#O
z6QN5FZ#<f{11-=03e0FME{6%2=U?yoJMQ^a0#}bgYWJE%p;<m`F|&`ZSCNJI&s+ZN
zN1*r1quN+=zp)l*4dGm_4N<QUTE77gx^mB7?_~Xd{Bh;q7xc7Jx$~q(S-tP|zsf&1
zCqk3|2^BsI7&@LECEqVpNEQ=J_3LG?fB%=V>c>6#BK6JT!~m^>>3;&Ie%UC$Jb<6X
z5RfDRz<jxs&i2h83uR)05C0!Nb`hyGt$yIRtkFrN@mqs?<B^>%-|<^-6g{8skF2Hy
zuGheYgy82Ql%*sN{{M52-Oj-3C~=U3{bY5)P44F#-F#%=ly~GmU%PBF+-LyfIjZ0G
zQ1-EM$^Ukh-(NUzRADJ36j!RiKXljFqr<%Uq<%dA{}!m}UjV5veaFX}@qff=F2Vtj
zR{i7m{H)6W*sl&Z`d(aa*ZQh*f4^7f;vfI|Pjb)Zqg37V>Ityk>wGQ#zOTE)`!+-H
z&qw!jLHW|4wNUMR#sB+ffjs|@M#azN{~yi=A|%&KYiauJarFO%?*hO6=Lo<A?o|HZ
zI(GPbM99bN`@bx}GKevQd<=(=`X&*K|9!K6vu}TdtWD;-fB)1RIuHJ068a7wzzdk(
z`@tgPDc%aG$k6_Cy3tOsjM4U|=OB{&<q!k^Hi&qc2S*HezujsEB>H|Jy14h666}@y
zPuv*-Q(Ah{^4ch>fZU=tRNq6#ACFk{Bs^61!}Y%p@)wVli~fIZwp;`r>CKn18-U$&
zn4!&P(;pwO@whjl-}l{F$aevVQw!Z+xwFAkw0S`N?gs5{b9KS0({@A7L$p4`UuUBE
zi&S^~WE#=t6Yu-y`>G~Jc=+=7HJ|qmLya(v4gXH~_9nRW>p~`zBwS6U4_*jRJ@S`9
zDSF5$L<ZnumD;Um*!|6Q4=OBXx5b!6TpRkAFzQ#@-}h%sn+d1=K;N@Pl`Udz<*yzc
zlXAVR2qW+1klm+${a81lNGz+_#+$qein#SuRm;N{U0G{zM!>x>+qcOejZ7tUc?u-(
z)ZZyJWod*fG|V%DGTHl3yGS;WIQS11JLE)=XB`+ZSPXshOV0QGMgNB#^dmNs!HD{I
zayk5{m}vI*2!ginukiu}6WMuGOFAut@Bi!Mfgk_e=0D%%<}TC+X$6ftZk)e`MnAHi
z@9nyuvEt{svGGfUUL!xf>~0Mto%U74?N@f~#$x+#ghJrcAZmGrCC#m0KNa{i(r!$@
zulQf1#rk~)qWhG%5w=0Py-1q*`|19lp3VB@lHGLAK`v#THh*ii#$pPn{|v~#J~OaM
zg*%mR(%7UOJ|LC#C|`SegKkp3ZjGk;9M!oczZK<W#?gj7#*dhCRE|mfd+~U&*`*Wy
zap%R=WV25=1*WQAxacREm+gO4LKXW+cD1zOS;)sX?>RUI!+AJv47{m*Q8*a>Wih>B
zCL)Q?dBExCcHR8SeTbBGlhU}cd-0+Op4xQqLcX|yo7P`1UT7Ax@9zWgqPE-aqi#YD
zXpK@YgvkApl=w=!*?f<1Kf}YlGJ<12l4Nu(NMb@mRuslu@|5^LhuIJ#oCz=g;mZgw
z;4PKTzuP|}S&QX^&A%GF0bWnlE#<fUiQe<?zeUyt4kwBar=DCu>tUFb75O^<j9uuf
zzb+H~3wUG~o&P0nxU{m9L-3FPa+>J#-)Z2lkC;{ruJ-s&>5Wgi_kf`9kB?mc{eQd?
zbSbYc;W;6Y?muGiSFFZ!5)b`-NN&86_2qpL-RRpoJk$3lLUxMq^5#fuJ8d`Jn}y9@
zz~)X~f3m;qQuuxx6Kv_>?&9+c;q!k!Cn4HzM4o@A^U<fvx36C<4mNfE9*>`Aun;+B
zA4r(|{t4TEUh?L3RKUh09k|RuoGjvo`|HO4*Qes?(Vl+LB^xX)8Oi?+I;_=q2(8_C
z$`9%6WZf3L<qhG0knkXZ>$d(cJ@7Tb`j6-%d06#d`98l%c4_lj$=VU^fQojSZW6}j
zoiN%ooEdD$KOqtLIpfK_7Ume_C<0AvJut(tndb$($j4*s=kKidzsZpFyRHf4b<J-&
ziZqH$Zr<kEx^BWdt=$$mLKa^v>uz4i#)ofQR~+v)z$BP~N8ELn{8PO?*$<wW&TgoJ
zxaSaI%aMFM==Rbza)TAm5_F7GTGpUFhe<xIa1NQZ0RPOoBn=d5{_Vb8G;aw#WTYk5
zYrZ5P`wStaN1*{^JJ#_QTrf-#oeiduJYA^Wag%2f9Jbg#>{?l2Ryc@e<4EEg#s-E+
z9ugUTy<;i-ya$P{Mete__ES26j;fRc#m&b_g6RQyZd%nb$|aPxUm5`9Z|Ds(|88%d
zfqi_?+64X6`ZjP&hFOoV05nB0hf}rAm^=kMXdVnpyP&1056%6EaTqkWLBqTY&`sgb
znw4=l-W3?CCeAD@fp%gHeucTJYS!iW>HS<5?>j=zSXv<K<EQP5VB}>4{2Cjr!O9NT
zZ!n!`Jt`EleIL{V>?E@lMj<-_esfz9q9EL}v3^A<d!9z?@A^)jn>>csNk`K4fVSUl
zq>w2MV*;rPzAQW2o4xnxwxZ{20fw{1(3Xz43b5`m;!gb<ngK4%?2dh`<;Hxkd|=S6
z3Lehp;1B@dc;YZ#+hpVphML}nVS>&@ds%0plNB~`m-3M)6+b7yqD%qmw)boTK@re9
z+DRhi@b6*$SYo5)4`<8EMofb($*U?K8k*SAR+T&PrC54d`HV{+mJrG!MDOuQDRxXS
zbVXC*d1j!Gwr<5o7^pQ`cj6<G1+2hhS9xKWh)jV{Um)(E^MHi0j96uqRhn6Yxqrsq
z?a34)R>L9;K}uxV9=kZagzEw2CK}JIvEK5b=YW`8({snPwUB+^$qN?=+^zQLr2esI
zc}VotLrx^rFn9{75VfG7LjJ^!dT{|-xp$j5Fk>$nL(g8nGR8W)f|{xD@_kC@+SH%4
zN2Hxkiwrm=Q#1+_D~THn=0#N@j@Mx*K>7LM7Hs)q@59sbDSiA$i7c$+itfzAT#;8e
zU2QkqRpIP~3OBVU(!%<MEf-Wb;xCp_w$5uezjECfw}C&RL=9ARznn%W(7yrRR*tYy
zZlQ_+m>r10#g$U;E{0K>VQXF^Q-k~yToq_VM`eGm`C@?KGnr#~mX`Xoo;SYQ{@W=L
z7vkmZMkgv^4j+blKBK^*u9c==Xfxbo_O4(vp<;r_qwZe5_We=ysJs0Y={Aod1|qay
z#l_DifC%G`glNuOdN&qrXPIVw{zZU2+^12lcBd}hANv6TFkldPFqYl`)HT-oQvl}}
zo*PbbYmi*rQ8V|BB75VkmTiG(&y%0P?`i{P<N&IC@n_uI3^)F-5B1#85!^=Kz#|(O
zb(76x<6%x8!&fs6#9U3=5#-~jXUlk2Mt!Wrcj;z`!~PdA)G_-uJFJy`vj3gqXxcb%
z3Z=yRh}~H+CnbY>i0g-^%5~eqRFoaIxt<iiw$VBex(e%ved*$=iGe{Js9kLaVD~){
zTb6n1KkBUq<IRg=-NygM{grEZB>}uh_e4_Gvgkc3z>1q`ih@4wsnc^+j7lr8W*$aR
z+N=Q{ohqmFWfhF<t6nNJR0B_uJH&xS=rZxm?Qg=)G=THcSj@%DR4{ehiSw1;cp?5F
z5S9(>UOaRtnEgdT`g<n*<IF-Qy*y=y&aHo@n^u5{$%;PugHenZv$nHO?jwt50BDFS
z_HUmBC)yBP<V=?+#7uJ--_5CeE^oKrBg;CBu)K>J*QE~|H^q_=;yrXyaD)d?aw>ia
zvH<x)h1tE|KNV11`!GZQ=|rRzd=A5}mp(7}Wv{6R&&9RnzCIb|uQ~Z1@ZSpi`6IOL
z-WfzrKO%9bB|<9aKsw+OmD4tm=AXK#z4-e38{mP$R=NI0RMs)sOK!w}<Sf6XIqrBO
z2=%$(A4O+0mD3L&?LV)UPKAt85*m<kX)fc(euG@F{aIoOGIbB(Fzkel-BTR8&Y}bz
z<>EsavdN>9Fiq)%ht_k#RN7&-2wbm~q-|y49F1NVL@9p=uRzQH)z2gZ4*W_cp@u&b
z3wqyjnADn#cjfOY&4sb=i)jjdq~FcAf^f+Z&PQBxVBeOAh<qFR3dNc8=T+^thb%=y
zu5^cvkY$%cG^MffnrKC*7Y?R>3lC$z#{xJ7JO-0}eNm#Jm_d;C-}79dVeC*=<idv=
zQC`#nyvEHYGQuLcZEB|PU?LH$7&Vdsp5qye0j_}fq3nX~rWe6?!)~Ml-~{7fA7tk-
zOP)BG$Ftvg^;(LEG2nD;OU$E`2K=2t(4E_EOy#3(I3S>KDY#P~`<p0Kvi0xR!NNsD
zEA@hFx<zJ@&CeDG4mNkJt}a<N7$EK@GEqg7N;Ldmf~2G=FapdP>mmiZ5L`9K;XMnp
zR!PVH1l*FsVGD=nlE}H}@HVg4mfA1I?tUx+Gieh*$4#ogx#5x)Zr>zEx5yHtB4jN&
zV*sec_@!ire4Pp+1;;C8HxH*A&Ru{=*a?H-F`(Jc2%KJ-m&#Bst;$(YZ?jyLk9gry
zE#?A+S`>juB^r%SeLUv)d!9~SOtA9%1fk}#PJj|F3u$Mo<rqE6>Br*To>DT{0)o4!
zELmw02H@>i)C~*_<QI`~B7AO`9XR{i$*-Ex0bEy7y`3qeME;bIP$`^ePi>29Y%zk!
z@Jy*`<v2a<0BGaq08XhIOo8Z7Dq)sUI-k8|7ymlynl24ys_{w{=p?7Z*Ga4(VN_&c
zfZJ)-T~cU{wC$6Xm|(uF(b@Z$P9|^XVcuY(ePB^Z12C(VR3r!hi2g1F3<b`oPf!H6
zd1$VAR~xdmfh2@eU?AVBvSdGa#}2c_OnI)k_7JlnO!L(BWNEJ%{H>i<BaIzCkQkvn
zY8;Rz+Nz9Sr3s}FX6|XP9>CVZc}{He5_1ml!rdDCP@E4YdYbY0jGnaqj96wsgy)z4
zGqgGsDG38g602ocVP@caU<Z7-E`S&hwxlfhA`DKui4XR?za@~8B&{zk$iT}*7M<}+
zj9<X`^+SjgFeQ_Rxlm@3P{-;*uQLv?foz0n3mG-h9!D&SdZ!TF0LaWTh93BM(6U6=
zb$LoeT8fEPmayi4PgQDrik4od=_(UJmQ0l3JP@AyTfDI%20Kle(Ih}f5o*YXBM}A~
z>miqCyX-HGtv>^8^vOxcP&)x=-{+))?NkN!!_6QC!Zbfyz+8tC-9#=k$bP-<l&H8d
zRuQG02v8?2goL%mU<2T|(!+3G3bbk_WMM=>Inj6zDhhjYrc|XK@*OR}K?N}X6=t3?
zWg3ub*BJp=dEPW=AuA<JNqGZ2K4PzA9K~*1;R<nNP=azxK;cDR566B*AW0<meR6MZ
z4DYfT<1rcxCj+Ym&|1<f4Ps2O1jW4%m{KA9#|Ip-)z%<YROp3gPhHRt5)mD+;&f1c
zotD>GgVVHC8q7)sgHYdCq8Wft?{<P07SSUzGx<?LMxdWKnPIzJ8Iq`eD*j|ZG`<(x
zF$!=51~-N7`!lB%c=AovNaP3;ux0$9D=z7i+qz;?o00<tCt(9UPytvX(OF|9{<v-L
zIIAgolBQz4RonF)st@1jICA&U6*Y5Cg=bB<G`T$C7U8id(8V3`EeryAo%v6Ue`Y3e
zM}$uQmO;6*Q^{sN)Fw)+j}NF_4peXvke)`y8Xl(dTdvs%pjDU8!&EaoRfIh*UbhBd
zf`pSNB1|a|RaMTXkROPg5v{LmB8{B@Iu%}vn6sLi^@Rn!cdKlGNv{g5WvRZFWNmTv
z>#t|lu-+!f7I*Ts>5(}ZVe9EBwUdD|E5+ay716`ex>q7&Vg*rq6G_>>Kt(rgV#UD4
z5G>7<>+lajz}v#X$grvI1kO*6$pNe68u0<G8XWqfiaPH=YJHGcc`J!zn1EB>Q^sx}
zMFl<yxW>ZJp_nqw@U_*{E7VCY?1fgfG^*$E<fs8sS$sKa<3Z4GA-wZ@OhaYs_X=4k
zm<Ve%r5=Tg06=Q%GNm;bbH_^^Y-YSxMoLF6ORE5kc1dA*lGpX?w9YY(71^E-NJ<22
zHPaacA_1*Q@;=XOc4jZ{1L2clwkN9Daiys}^cYhI)wnT0C1%3Kw%N{=<@{5iA?vE-
zL$pJag^E+m-AnCKqh*YmD)Q=?R%bR{rF9llPV(r;3DqqcwHTgm9JvSa%dU8gNh)R9
zAw;>1(e`y}dlJsLMZ>Nz+@HLXC!A9VwISTMqmEl}m^i9<bEE5OHhTf6b%2h?jX!?x
zoxt}`9B+hTjSt+CoEO%;qr}Y|OJcq}H^#n4AY;3+j@T5fTj4pq;ZXq}xIfq^H0!C9
zZtXf@%h+?jDq6(nDr34x7(#r7WPdG-7wKa3t-n!0M*6kz%>6pXMER7O6Y&&%SvXhZ
zYQ}FX<!*ruC$*kfH?!@m=Xy*uxQEC_Gv(pPA-luO_u(Ri{)M#rVPl;>{pMv06%e)j
zJ?341asC-X;L><=mkS88*a2mwzl1$h{l$n6wPBJem~$Cwb;_c*<SfnfI?0>13-Aj5
zdBH@F>mgguX}2!C2KL1L^8AC@@B(wc@FKJKZHKP8eX-KQnn8V|mAzQgXFFF2sF^m^
z+_%d8s_fNT9IDy3tsG{D-p)hTTHHp_&ojoWvj-))+fk$cN!V-I?Csa~|40ZoJ5j$T
zr0~N@+G_|q3R*V;pC#*~dqP0I{v_QUn|P56-H;MVn`eon@X{Sw>p(*IuvbV5+s0ar
zbCDi=;Z>v{&hYis+!gQpMcR%z=H9KuEY!yfOd(X{Jb3V+-25tILDd0MWKhXrFy$=e
zQ@sMmKYL)ap2ys`*TD?sm9Xo!(Nm<}8yG?sbEF~;JBxP~?AtSpc+_NmkkuU#mf!5y
zqlL3bhL`@2`6hFT5HQG`&wK+UXa{5{Uy8${hybsX^SBiF3-y9D=g2t=KrrBQ%t)#&
ztdaz!>6^f{n+I_RMX_BRpWx`+^^{lJ_LN3Jqq2*^YJ=-)jAnP9f;7*G*`lWyeXu=Z
zp5<23g%dT0sbe0(q_Q#7h;>1Yn;7c29Yv#%mlpIdgz>L1Lw`4Fe``fuI&}>$K;5?|
z7L2IaEq&y@+>LFy(-iZ-HB}Aj@GGyMjH47m9mHDF8VN<R2n#R`$nZL=)}afrlO0m{
z)GLdk-c849eZ2DUl>FoK0_T9L$((m3jEDjJ0(6lO|8Q0^C_<zd-!1?t%IX<=WB%qo
zW3CLa5gm&=pzXljZLg>9x-#b+@S5vsSB<q?)QiGUIl(5P9Xs7cq3=p{zv~t4ulX~r
zD^bhe!&|vMjaVzC>c;~80RF0rWH=(iOfR<d@8{IJV4hg|I6?QtoqEAHG|0wYA;M$Z
z%<k|(#mG-0W@cH;&z7ej0zrq{Zp8Q*6!DOv#U+$vNkTqqFqYY~1hln93+os!?rF^`
z8Fwhw>T(W55q0MHg?jXZaluu1(IpI-(iJw4%_GvRefm7;jfjUWfYqg#)2Sd*z+u_2
zRBj1q4VPk0oJnl>^m57y-M~dTsFvuvm8uZ(RDVy;fz6ooBRc)^wuIE&k!lTC6y)Z#
ze^^h2<7J{I>|S$8rF0=iM?F>x`e5nCx0%~LS?*^A)YS9Oya~reLAPrh(Kx0kTkgVs
z=X7zYq)<Mt>7zY)EffAjCrD9^fgcXrxe9l9rKa-Zkez5XIpjT7w<xUC0t_6&Dk7<m
zciGh$YtU(#Z#cL1vf<$ht!R0~M>b>3bMTa?dI-kBFHye`5>GZvtMSatt9P(9D@Vlz
z>;d5=|Cl|}G=0SO8=Rwv0v^b1Sq+wOTD5kcpv7Y|;H%Z{GNQZF@Nqvd_{lZgwpyc^
zhv9j~Kf1`T@(JetENRH%g@RZ#$L`X&OltH)!)cHwgB>CI<6BE@N`RE41~J8U$XKDr
zm)&G83;tOLkO)mtQsl>P32Cz}(JcU#$u-wp0X%r=2RQiipbV-UO$4Kr;i8c*V5q<^
z&{g3^xCKde*?oQ_(g}E71MGAy?xdZjuvwPgI8u%o?^Tb#1wFR6I|aY?*i%MS%VZyV
zl@lF(R3d#@BV8`$&X)%qnCQiHW+_69mvL?(fEGJYi;!<D^A@stL0v^?9Y#EzoA|&d
zHJto9X@NWN0@KsFV76nhGBX0+!*jk0pi$O2UEq={)gZBoJg^K>6kG7a<K;`KIq%xI
z2EWj1SuZ~?D#PL$>Q=wJep1Mm5Y7IrrS>BPKTzp`Rh$DA=BvQd+7n4ZG%jDCB;3rB
z1Y<_svBr065N7JTYkf}Tzi*Pr^K_7gI?jGF5_d;IND)wv?158`Y`Ly3BP)<6`i067
zD9fL}^B6)Zoo1`Ju^7pIX;4M@WTkc5JhNhJ*=WO{-qqZry@1*_fwxh>aMl_C?SU+l
zbvLRtu{R+iu_O&Yo;CUeo9m>3QEPI5ahx!wc-UiLrd3%x?S+s!b0!x~Mc5vj=^3%~
zF6hOf`95K-jRfaB%5Zd>|8^^%q!Y6EEnd^A5rNOnHJt1WdvcV^x7QMR_S9+Q3FhFl
z5nmn!nm3gh2E4M@WKDEr;+Kch<F<yz)lyw{WHCRbl38v|6$!>Dh8V%Z`SGrH1n6xr
z0NkllgTssZ#DKcxwnYfzOQ(7n)=)u(ltqvd%!So!r`N;!ZAhT)>N47^3a<B=!KR51
zAV(P9_M5_cbDQ*hv-?@^-YSqqA`bK?pqv*Uws^ZaFWfHU7W!*2Tt7|M=LPOWB*0Z=
z&R{x2VYJWSog|WxgXTpIuiZ03A?(CkAegbjt`x^?HyHQ4y92V;$h#V?QI#@1j~tm4
z%;EBjjpu~o9Nr~!*joT$aG|L(mS4?5x*|gCK2Idz_4t5b<#xz)O8X`l|CV9Rf#qjB
zlTkF1jJ8W$sn%ht8r_}BcbUvlE8;eS?D=*&m_1DXE$v2W!0+P?{8<dxxM+tTW_qfH
z(IgyyZ{1LLV0maTE2S`V^ie>ezfV9~V&c8SlyhZd>5gXRz@ks7J2F%<1_Oh^YOLYG
z_fTM5K~7|OW)6kvc`J%&(|>!dHSRFOb$9PJ+z0EkS*H)7s3C{QqYrKb{8VQDt1S3_
zeFkjLGnMZ#lIHO5m<G4eor<H7VjysRjY=MPS?tfadwmK*Sw7$*|CLjE-LjiA^GyQ*
zROD*4IiVp@MIN8iw4b^7Ua5a@8%$v@;<J-(gRh>&5`<5M$D~$Ta-UB7&VacNgXpv8
zCb5SGuBqTQn#?JXYE(KJ4k?ph8~>IX{$+0GZnYQK)2G?fS7(9I+XYVDHC}P_)x{9M
zRDn)SG`HTa2g}hMgB8I%F7r#qEc9nHKxo4_{qmW2Uyhw#5kKTO^n|j_#Kon{yZ<BP
zwx^8Tlu#rEzF*x}>R3ByyWD^?XK)O5b(?X(IP|{K{Q?<;eRfSCBQ>Qe$;A?l6o_m=
zW>frd$~rF>kne;w5cJ&nl?+W1z#VE(KhUbM+4E;Vkbv+EVbk9$IISGoBZH~Lb{)ER
z-$8n(dg3F9VT=JAIS18`HM!B^^fC3}ki)u*bb(7BLQI=3dU1EY;u7$D3Cbj00B|;P
z<kZ_Y7a)Had*F?-YlGmD8KO*-B`+v_0dMC|Os-c3;AHMZ^e(6&xG|$u=_}Am<2I~6
z)SPSueybS~4g(?a3&|j6Rh|8E0r{^jpY0O<Smqodgmyp!`OV-{u6d&e%`%1an8n(`
z=G4zdYmBa+{qDmR!NzlFaifW^)!P`*A0>|<IQ+Nh{LiT=_6WYbK;m?1i}48J`ys$A
z3^Zu?U@fNtQ?L!{Eod+XAg<f%z6Fs>x1WjZ|CaVlW5yC_=+byO-zjt)+f`twf<64?
zYX^_nOZynLn%n@J#C(XI@!YDEgJEXbVncl}Dv4nrNHJ`PM0B;=TCTp>FfLQdSe24D
z|B~ztSE4{fij>q;n>_QpcU5Qx1s!MabAEGhD%f;K^<tC{A?Z|Qw+s&+WpdJS*Jl|K
zz#3?tRojua#UVXWW8hlk{v>J`<Y@@u$<S2xd<ge%FWG&TNaK$QGW~hNe2SEu=kGyN
z$K3JdAh;)4jo6yV`cSWQX6jy$crOUBcWrZisKmtjmD-_R97yq|*N0}Fb&swjh|r+!
zIAi|}&}(5d4%y=^_{|7S<$AkdC}a+2{!?Up2YbU(jl>Mf6J%d6*?MsmdAQ&7VtcDt
ztyyG~<-TQG=lX1lHH^nP9vO!M&C6PQZh-G8kvZfDeX^m8@R0i|tSl&MK7k0Br#7O^
z_Eu}{gofkD3H1PeV2yF6Eq8$TnG?`=d)tvsXq8XtA^K|*$U?J)Viua3G)|BTqdY6?
z$%VV2^`$LAU-)6w%#4e!pc>B>)v6KSa*!P2MR_;jzi}Pk;R$5sgqSo2f)i0Osc?HG
zj+lq^Dl#dTPgqRt5Zt+^cJwPa_98FQ#l$DGPb~n$Y6ly5D>blVE{p*?LY(o-8KKKe
z6eCkXSyCzZ2(UDYuQr_kDabrX=ewXz(MOC|h7L=R;S(DVsJ$Om9Ws)YXXP1Uk|C6a
zYIbzmb(y53nWg2nA-Y4@#3x|AYH@nP)+&ksEg8&c1Dtw?EKQ-s=SAbhQvil+V~{i2
zWHY{M=9!$lAnc5cvLWMcTNgul5W$!Szf>KHGut5tg=<z&d8C45WW7lgFnyjv)$~qI
z<ZwT+c=xpz|0rq2L^AE-(JOrO$@g6%2(OBZ(|(HpdB|@I!GP5GQP!ic^!N6x>p}ha
zrY|jC#Q0>+&gbtodja7C{j!9LSr2kU<;TvBl@iU&Ztu(-JvIN0yd*{^1;4@%JRfs7
z?xuRNH71ayEc16&AWCJbUqG-WkW1#_oabX|0LMVpc()Q5W`zuNK@RvRIM>Yk2B={)
z=(TFJ)m()^u~*RB5k2hrmP4+<yF}euWR!i7-#*jW;29_+o{DVeNvxv5CNe9|U<J`s
zwOIqG{dEndDA@zjaj`0}4bq$zCZrDwyBG!3s}w?`na^&>G$VjksR2|2r7Ftr-+tKN
zUBj5A9pWwFywmW&!FJe#FzkV&*#8T)^9Fo~){EfvW+&mtllk5SS=?(6f^aj+iw{Ck
zDmq&r{me&6L+*CQ+KlG2jCyi<YoKC<Q+)8?P7sPf`al%tpmXO}&k=!KwCfpF!O;8h
zIEeAYuRwRx47$2esL?nN?c9?T&me$GxpRr#H@Fs!zlh;=JUsGsst-x~wZV3;&IK}=
zG+8wDLnH(i=25k0v<Ql^y)r3tDn91p!0xR0Mi!<=K|$di6m^G-N{^{}ks&B}0S+ba
zD3ktzuVSLxz}3)1l&c>n#Z&j`eJnlNOyn6t3=obos0B8f^|bngE9ZG3;D<fdMQ6wg
zNV}oN#ohOg#!BF$;^!$HvV%i&31R{%(G#Z?(A#_mtz~ny^GGTV<xqpmFweIZ{LHZ6
zlzNcOw2iqMkQGPbMS|Ah?3&hfT!@HX43nfZl``GL)$H}X{Q0<$?l1Z0gUdoGxB8!Q
zKxvqxq^S5Z44S_*z153nzk(3bjGXz~_IF?epv`0vw{r-V*bVC4`z4fnP*akMZ!V{O
z&<z{gET}-J>58PA4DOHRHcRu>Ep(8-r*AjI(19uSH%~V8vh$kC1Xn?m2-_5n<MiYL
zHHFd+-HQ1lk*q2Z?KD8ftYe)1r5;NU`Z;E;LSH<2&(Da0^aJEtbhP-0%{qh&e-lH_
zA7@F!4MhsOInr%K)Xls)?yj>Rab_^B{pZlxM)G@cyHsj~WhJI)KuKeN1pEH0MRwp$
zew#cvcu{%>P~B#Q@0S@T(q<yBcmMbqBsWZb+V|_H%LAR!PIE9MVGkK%BZUc0Cc<aD
zn;jy+9M*!qfL(*Y!bK{#M}LGw@<M_S50lG5ATp}`$zjN3#OBsuI4?G+8#Pp0pxdsB
zog0=5NJ^<*9A7vQq;Z9mlKI+$wCozjbs5s?#jjH!3po(e8J#inacwn!nB(nD+gW()
z4a%)$qpgRU2FpXk%1D=(@jcJM4U;!+*5RqOY&i{@>YJb(Rf>04-)BgScE*H;kJ2ab
zgmX|*pvKT8gb>`_?H@h1Cu05Zf0a^>!YVd$y6c`tKY(LR*7Y^-<wCv@=5Zfc80IJY
z)vTd;6r?=UO@_{R2oW@&<0f8;gHCJ4O9k)E6xzL)3{UlQ^Lm#<U>F^nnTmm7MLm_?
z-cc<WGVGDLaIPE1yXP~WDl+neB9Wv|?T$`u=iO(fR>vS{#8=<ing-o>=ZTPENB@in
z$f*J|DBfLdh}2ZHfqdaUa@^&qlgQZeAni>ah=ZyCpR|OM^h6Z3OoWOYP(G2r;C(S3
z8duohJng%2m=kC2zBq7p4&DE5P}621I`ilV`pb6zvT|rQ9<HG!mrSTu;Wlo1Ne!<`
zc7(Y~9LXt`*rkM@i>+ZY<5HA<{PVuPe?{i(e$!KyP+>7~0Wqb34~k3T4eCi}3@AMQ
zx!5J><inH3bzJQ^FiqP!(%>){VTkN(;aE}=>qp5SA9zVVZhC(99h@646624LqItVH
z;7Rfz)w<KYb~EGn{@51>24PfTfCiYpy^%GEUN0#oI4uJPlCEtJL1C9MH^<^bCk)&*
zP6!x1e}0?v7Ue&s8ijSA8VQpSGA!{j8o2I`ir+oSAY=zK=S(59;aS$Z9gw6BhUsTn
z2HsY^VcTA*$DF*hvr+Izi${nWlenpi@j$2tCAzTs&b35_+cb&Ai?|gj6+vxZaN=$t
z%uv%BM;I#~c=L|XYYU5Gxeq}{W@d2&4q=?ZD~O1wFe?{!P-4^wX#{q3fJlE~Gxk0Z
z7r8Jhw;38Cv0_ajO=BsYm|^Qy**0h20mzxqS`h55Gp;O#u7cVXp~w-b{5Zi#-ar|q
zGa%Z>XBXNmcwOmLI3qUc8r-aJM@`{@Dgc69Itt7C(fjR18%$^+L1Jo`ueu04G$Uu5
z+g1n>v?Om0l#>Q?T^L31CB*wq{kY}tr%fCkzO@Ea_>ECttUf}S4E1*nf%43iCixks
z3;l%meiy_P@F34KwntoZn2BM6347oGj2;uDO_WkWZKRTsiHTz%BQcRR?XxY+hQqqB
z>ibC(G+In_KpU77qY|&dSlkSvE?sCI3R-qigS2TjqTprCiV>cGM1DA1(WwY`u;qP=
z;Ub7>k;a`veZf+=ivjM5wbg}Z@r!BvE(e?UT2^@-G(>t(ZP7I;4iPMU(7U>d1m~c+
zkx=W$rI}Hj`6iot;Mj%2I$au0eT3b9fP?56UCLzx4VMS<Zqi3&J%~_sEdUGtM90jm
zrnIx<`ETJB=|e$XIwu%@GiZB6rbD)|`cuhd%L8y0jsqcvsUX!;w#9+`W}H$SY-jIO
zpXe5oOUS190q(_`whwcwN=Zwq4r>~7{|QS-d{2oIcn1{)AQgx+0s}ZgS!|{p8NNy3
zOdOM_h_kky7nl6$>V^OjcMA)j*Dd1KR*vutrBbd8ySya5%F_Ye{V_Om@?kpqb~*K0
zybFMdUdQVk$0%QQ+F@2yUd#Mp=B8?&(c9?TPtQkv3rxqSnzST7vQbTOW$<NSV5_?y
zBvh}2Q?1PT)(4w^Jar<$DJZ^(@pb4k0ms=)Kyx&c)HjW0ipNusgl?-T?q%&;J75W_
zfb0V#ebF9uvplQNZq#$kdD=OrkTIbNrxuz8*Y{bDwj@2yD;UV9ew4y9S}8<mf<5Un
z^yF^1(kQuGf>U#uvr{{F%Z=7?N46t!&Mx2%JccUvZ`2{OZ)UqE;c@WkNku$FQ^n@$
zm&+E_2QHX6BJs2@rX#K+xgCY+6t#!E=k5!DI3m5!bz&TJpYw^_*EXW>y`u5}Wi8`H
zdrmXk!r`E$n|TpmzRkS5F@n?$-V=-d4hA6G&Vp|Zn_cXL4%blXfDN=;I)U?k6~?+{
zC`pdT(_xC~2`(}*AWAN@6#7%``wcJg2sD9ESs~^8fFTzLCJrGyuhxeTd(NSW8Ir{X
z6H*cdvL{5=<o>Z!42{(dUzfWHF?00$MB||j$?-Y2+K~n8OeVY!afBhJj~`<CDp_HP
zEvjkpCVI)a71RA5vIyyGK;eHp1~lF4xJRVl3h_i;pq6*d39R5HXY`@XS-)2p;3h|#
zvFcPB*W8itfBb3QTKPNJmgaCd9r<{{>hxi`ctnMsxaAT2IhyS~07K$=lJw$`$w7T-
z7mNe0ToM@H#q*i0`a+6dWvXqIEkDfVA1{RSSdT$Um6jbHf|GfiGEJW-o)Vc939ztt
zTF!GyNS0i(Zv}cA$SZfIg~mXhEXw1ktt;1Q5zl>}V1_7DXa~5LoGVdPY5cG?xIQ!>
zTm)n^ag0qCfUIO~O==GV@^sW$T-<PI6(8hhIdlIy2+xSRJ34=xW%IhECqFq3b0ot9
zqcEYdJP@Wu#oNYJXHZxxR1vRrYhSBno$7vQpnY(7Rs^H)Lk%cd`GzlyYrjLpubyjS
z05O+wzzo>;A&3=Im>>EDuD+0?y;Zr?av91$yJ>@^ET|L1JVd#0rS%jx?I0RAIA;%=
zIkbw`W=M80H;yO1Ffn1A6O3$gO-cuB8f?}$S$IZlV^$zsOZE?@+efv2m1pwJ>TXxm
za!?fM4G(^5wMtccNP7I-;ChItfN?IVg@_nRE8RzS<68-c08gqHi;J>e!l@7h=3JWA
zbnU!TD0`^~;WGs3-)?LCc2W<TLy}>FuarWmW1h@1V%Dcdo29zDJQW_n`-I+-(6dUU
z@C?s9;HW?Qf;{$#+iuJkda?_Iv}$6;DQS7^8BC=VA83{BL}gqWRpxj?aH^dkz!>hv
zfEl2tPvWi4haMbn3V}%g59VOnSoUB<4gKT*wE8(eoW_JGDK4L;e`S5inB9aT55Zzo
z4@(dRVu_0;pOZ>esG*VLYxcxX(0H48e;1*nhKsB40ePHswy9G{pe7#TiBp-MZ4^a$
zJe}>U;^?DKAERY7ZfcN^8W^C(WD3)ZI0ja^j3e5IMPH@rh-1Do>{E=iQX+foq0D8G
zCY?VVAExHZ%_&{C_%e&})RRRI8k0i@nvo7EObx^L+Sr5GLolvY!8hF7R?1p+qR=K2
z4e<0H$nf-&)9ovO0;zuFMyNXKuv8`6T5^4)5KVQ8F1yO~IQ@JYYm%EF<}xUyHnV_&
zrs3RcX!oB@;T>5Kv1Zd;d};IHoR@_4`HX|(XF83J&8TVFCV9W)k&}Dmu$m$ne=5rQ
ztU~cRT2)*K-&tHBp1a_-b3LJgcOy)klj`Vu2Q?LzAOZCtuT$T@;x{&TFvNxL$u8TH
zHcd}%m+E4(ckD}W4zEx?99-Z#(-PR}sqEXY1H&CV>N@-`{G3!3;g+qz+|I?a#*UtM
z1jDnAev{GDD7)!^r%p;82nc#^)iQ9xlI`;phw^8#7Fj)|#hYD5{Dre}E0XV923bcR
zwx&7eR@T4hG+kcg-kFk7sjy4nj}8y4%b(pYvJE$`aqqY5&Ij(<l(MqzFd{Psxvhn9
zB-`;0^t-bImmm?AtX&-qyAlu(5O28^##ec$Ky1cptDRf{P;@2?CytnEF4oA($?2jb
zN1&V5w)xDLlAIK`l$LDX_CC5<FZO(1(5bL)mY-8wl%!@m==|6UJm9YfzCpsKNW<Qa
zDbhMLkT~t((q=Ef-{%8W?*1->uNqt(Qg={O6I3AZrZ#v|eJ}@*Pz*6T!p>V1SCl`X
z-Vz!kK?5i>>lsWDSaTB!pUlrmRf8{jsyIF_aE^xCELg7@DtrZX6NTF4(Sx3ju`e^8
zt%l~E66h87A#FTUI432a#+e}HLpRhE_i*MOxxmQLETXiW#A?bR^^d~&P|GrA)p%B`
zO|ONTrn7m8Q67jX(@nRYvxW)9$qdN=RB^{UMQ4X9f8*{)&CjF?F%TUI<JhK@e<*kO
zU_4}0bBBo`Exy6KAUh+vAmekcQBP*iQc@xR)i?VF+5QP#I1t61_AO!gfO4MBx&F7M
zK6MlUt$nn6y53H~DXsS{uWTUtkba^bX7#EG)Y+?<2%){$mewh^rl`g37-14QLOqnV
z!-VeD$r}QZ=dGp9UrF}K(X>V`&x(@NEl4^`nell#JXX;jQ|a@MDXY^e>=n?U>})Y~
zc%|*0=ve9)F&KK+(Q*vT*g0glu6k)#B`Yt8Wk{5^&E4~Y@#~4U!}H&30VpQoScJYG
z#XpCF_X?n2ewA{{>CNPN3_E%p-(Pcg$NpY^k2wVWg8uy110TSbpn0I3@%;LXBjg4~
z)RX31cNw7U>|HR&kt8eh$f)Uj<DsJ#zsyM0pjf`;)TNB-gRvG+tUgK5iVb`iVd}8j
zf+cT>5z2u7*O1d2Lge~cmt>9M>oBZWZUJ^3C{uHwUPu$CRXuO_@h<kBf{w<Bv)m=@
zn4GJPYi_&Ws<_O1|MYSZ%qso0%kF&Q5BufTl=8o&*?}<0Ovl_yq|RxM^_O!JS6*75
zqEZfLk?;nE5LKhZU!lj(jJzOWxs!Z*BEc)v#-1&B05_!mS_z2`y`3l;WpRSpWVk~H
z!AbLEnTm5Du97i)HVfZG@d}#zXH*-<0qvc<ry#K7j<+Es6~z9GLsEoV2Od|Yn|9iy
zmoeZ)2_>+CO8Ye>-jkcfjTX^m*<gf>l>{(Cm0EgM%D#RB_z~}ZC7o2QhvGhQ23>#&
z{$K})5!Es3k<|}SWW`rI4RNIh)cA{7s}d$5u@mEN)0>?Dq1LI%a2a(rhNfb=lnFJx
z%vg7I4I`Q5WKYEud$No<AW%Cwsr0C9Qp+!5sw<nWI28_SjXKLNNC>-_h?u{eHu5(0
zEZT+@^*4;xWY#V!)w&UyVAUS3>|}C`?OEsJC?amIk6d3kF#nqDG-CP8Yq@6$uUEIf
zbd^(@M;g2wh6OYnJ|8i~G>*(A2OQ2-Zp=wfuwZuqJ$$Jz+mO=RUX?V5WH!2iR_CTs
z`HVRv#W#DFrf%2biGjX-?pHvN^5;7I{?$+5kQ)t@fXhaYHrT1|NB1-)pTim@0e*H+
zs}!pIbO$O>%f}Wqjwq$?Z#vHS%)=aD_CY5_hUu6lFS{SZ;-`2IU;>RWXVN~$>t*z_
z_bZUKL4XMkuAKV{g0884g3xB76Os~^Q(}o1lV$b%_7M!?7=_)E={^*Y>m&&s!<<iQ
zFiw^c$N3HbX3-oXjibC{<6f^bM(<R3R828u9S$RKU_|BL+R6Khv%AG~xIRGHfV?9p
zcn~@QPg~qlJ8T2wg#2*R@?@bNQCHgK7k~idV&XSiIH3vL56uh)IpIKL8^Yi-&3LLg
zo>l@AScDO}8oC8bjH3<mc*mJ1X-+T1I#uMcgibs<9fcXXDYbY#jK6WKAjaToO@gV(
z1tx07Yut~Er%zg7kMe!?y@1-nl9G1bFT&4A-)3?xf%88eUx;R*1`ie@h=?fkM{wue
z^VFu;J}lcX?UiV*`FBKlEF2Q_?}s6jl*E*H+F$<;CnPmwMzzOSl$LhWuDcxxkr4`L
zDY_rlss%r&d~qDc-iU%8GptxRbVOK~0wuU#L@qlf7Ii)~G6P}aAcCX$^^Cs#kVbQ-
zb$_<|U>Z8{2=}ojP@8G%$nFRC7ZJ_@4RseuR>hc6*NKm*`b0bfo^^_U?yQmeeWpio
zZNq~%L7B)UJE4vb+JUH_n4;0nXk-|E{c?a!28r4eNnyRL<s=_*uej=YHA3l>$hfV(
zmFd;<7AYsI(oal$@OZx`W3=N-c)NfyLF_yuO&9sU`}pj-nB34*;aai3e;y>5bP-JD
zUm$0Z5udS+Zk>jCg_q~0*|GsTBknAAOq9uqf|og&gHov$7%2ov%W4*KN+Szalbw<F
zNs^XYu%>P^eJd08`4V)A$?!hEJk^coySc{wk!(Vob;i(l&9`(SHHx}94~b}8hPYH)
z-%2*7AU04;pj~fSUb$JkPT|3zW2{m7N?YF{j<mA=xK&hg)$~y*)S3f|Y=Bb88lpzX
zN1?=)qPct39m&@mcj)Ey6t+xjHE{XlIMhY4M5&jn7nOuGQEZQ|t~@#LmeC8yTJP5U
zh^QI%u~4ea!n{mB6#&3Y@dj^ldZP!*A859%eFWx4s)NVXeS$ab<=?{k6&&WDyGVG|
zN?J-r;3ITPdc{IrckxdOE%OgmfVk%)?jhw0ufj5mXRt}_+o#zquLJ;+V?66c`4SW;
z0!gX34+=VZ@>kp*gYG0T{iWT%bZ`Mrxg9fv-0@JKYyFRlsPKQ)t+w@!n=7F>tV3fg
zR8lXjtEpHtg8h{%_#knR5GQqv79!2>DZ$Sv-QdAI9KP<aJ9|Oc9^sJj$u28%%FQu*
zpHkosnx<x5mgmM0821;%zI3BgyDfEzA~;@$<?x`M69g{nbR5%MzsBntY>r1q%>~1R
zYIR4Fc`J6quSZ_MImW}IKXT~v*BAhHN1A+vG?Blb)KAP^Lv5TDhwJk^JeUo$;7A3H
zl(X|N^Wv$WyNjE4f?7IRF{Ydi6M-x2;n1x;mG2q!s=7q3-_aEYMRYd%?A4*`Fowu6
zj2Qu<))9j+$H;{jb0SG=gZcSD#4YmY1Nr{dcWSLYI11K>?Je9WHt5ydm6SDG5&BDJ
zMdF`3jeW-iC}x9=foVW(O?iVG(}1MRpV&@A3n5np@BdMp!qfj0!7x}uabl8>x1|vz
zu@bpmu{Fuffc|VJPz4W5)Ii#p@vL5=RG6`KbmR?3gC`ZLBPCNNEB9LQ+XL4j;y|9b
zic+1}_OCF?nx9m(La;9X(?%yT4=QVrt`d6p(A!aGCL0F>Dz>0jk}d6NJ)c_e+G(?A
zo#w~NZL6>_ZEojz(~h2t<=gJscv_zPn*vVrIWVp`J=_G*E5Igl_+rN1{Lxa!IV*{i
zf0A*4i{k%Fw*X=kh)LWIKvu=;KcWpTzZzyfP>jcE9)M2Y7po8~f(VAN1ij%(<E4vY
z<+}+r^0)K}n3J=m4Fx6PiLOA*ZDLeK5Ny(81oT-~LQ<jCjcAB;C+|8Qx;z;!`FyAa
zCR4g$7DdJ-$rZP@Rr|a+8_e&J4e0(ikn<*lr*r|HI1PA{kt6S*BPY;L8aj-uTd4lg
z>(7D&^Q_iKprZ6u>mH(KodfXB1Le5#cc$ruCtav#g?t*lFaiCh-HDbkZ)jo}jbFV9
z>f+qcHf>%RMwsu+DX9K(_DY|9BEATCn<Vj2_45eEFO0{M-2`W_G3cq=dK8{69%Ju<
zob|&4-%W_?b;6yS^(jJU{6M^9k|QSU?c$3x8x5LQNAU!{%&OtFFmFtM+U^4ufU?D&
zVIICYP0f(-TfhjZAy}Epur4%C^R*?Gb&7pECJH7~f+rKq7Hxe>w#L33nVv3aLv(@n
z9&`y(VU#H+$IAdZ7f+DBOYmzs`NhZFeKn+6IvY>`&!_A>qtXsgPn$seuxu+y@)4_3
zD?f1o26f|rUd{%NMF#>o0dXb|$V83g`a^S%q8ug7VFK-E62)#H-c;Ci-{U^{#pG(O
zDvyNs(+pDWb7wyuw(F?Kal|h!XFy+QY7aUx0EOUQWebbIu$J;`(ZFkP%!!yrM^Gho
z;mC3O<s)G#ZIGdN^RygO&n1es_Ujh`6eE{$({YLPwP&fTt80%!a2m~~Ty~Mh83Z|?
zA@`nyQljn#fUT+@PjgwexV0-0b6qZtqVKSHEXAGmSep0stjY+Q#N;?8#VsfBGl-=p
zsL3e^cNBeuUU}}!;J+Tj%Wp*5ABg?NcVBNt^pfM1H{+rZF&xGZd!!6R*S}Lp;(x_B
zgHQwk%Wm4MG0?mC4fH@y(;BzyTl=$BgS*s@Mxh#O64I%zul-KIgU`&{&v7-_!*I1-
zZPlCH$S-Xka;GkMYo;i_MYP*UqM?%R1T*vMbSC;FD0;G*V@rdCE@KMl2}}XUln+(=
ziGUk?NdQ!NTmp9#f(vOXwcW$MEK7iN?2G^gz=j#oe>I{Z-_ryVjS+qLa}WXy)y79*
z($9@KqX|Oo2xYNuNvAb}Ym68XHPfDXS9^?|A~Aus6_iVlngaXf+^P{YhNLI`aQhS+
z2T-S2YZb-R@N$u~qhMcljj#9Ku@dr!gZqIn{FMC@9AbxLByQk)8e#o37&n;mA*d>#
zQ+uH6JeX=9ucJW6_F`ARa%KooCZ}Z!QrnE!1Ev<Yo;J*1@X=Hf3aYYeQ#CZGojCiF
zeVF>CH^(?YLn!G8q$)xjX%Yc}#P0+n^u7w;#qcvGvNMuSrx8#{as?@a8c!H7&P1&(
z(>3?Unxe)mA}%5OPnnF2*yrItqIfH_(9Gt`+&t-k=IL&B?6^Hk7+L$q=A*ZAhl~GU
zy^G>FMQv*9=PT92^FXU(OACx|4W<Sc4V!np!>By6PNhAQW(N9}2Tk`jt#guq3Qf^_
z?zH=Dl!!AV%C5-a@8hhRuE(Dm>sg1I<J8T$+D}rL4LKe;avhA*ZcJoM1NbbAMEU)$
z{C8ImE|Bh*r?^Hn8Z!^aQJwG7+Ueo9A#1MW6EYYcMm&o`np(g(Sa~J-AtIPkll)%t
zuW@XQtYVQ6hm1`EW34eP>CpQRh@4E{>;;G!Ye~1%BhZk3D^^#{xWRmmmF~hF8pfDY
z7bpk)y3ddwid!|Q=ee)yGW6($YIcE^1$aytYmM{*SmX<_!LUkLP`i%@*IpWlQ2-}Q
zl{QKooIcm%wx#8Sp@J(~HSCyXfz_{l2s_8qJ}qDg-PzvoD*n|1=u4#nSUKLgs6F(f
z2nkcBVv0_URtATU<`MO;L@oc7v4P$VSvKTpM{db~7bLh%Ihp-jB~|9W&(~VYju>{e
zT#1T{*zmXF65_Qvl^!Y3S~k|ACJifNSztm$t)<((3Z)&fTT|CL+^0*3oAlF;AGvVp
z(xvC`)^<{q#8JFo4eE7?ReEXTN)db(xZn$tBuUsv@w5Nf6acUY!x&`x1krbJowbTS
z9?@vX1C^>{fSJ}r#QXlF^5bcpiEh!16eo_O-Y$t@_b!FO$OXT`!4^gv=vBG4J@DAt
zV8A}Lr-PD|(bHBoMHkIGvJ~1&*P*51rdr)H6!-*Mv4rv(8c2}tL_p`aUi@yT2-D*T
ziUqXj3hNJIrsGri6pC>8W+%6<(xxa1K(|}P_olaVQkK?(JE#epLc8bjU%(<{R6Xmb
z$}fMokAR7BI@xN@_c<3e24RF7a;rT=5mym<Ng;z)TI`IOjPoj&^L2Kdw$-nVDfhg&
zP2cE5yqU1fh~dZsU4V(X`*BzRnZmYe)aA4mpsdm_k?;8Ic;xOyfxH_5O5!sa-d0~g
z_NVPFN?xaw8ss?|K<}N4VQ>bD>oc;4)y$b?D}1r$MZ@u`rqk2*8u{Yn{|*GE=K#3c
zIOw{y(_uz87QH7FR8-6=nY{@iK}vKhB|EWfmO0qSAd15ggz?L=zBAwoc*TM#n34W8
z8yRbyQB^ZCn}TUZ2-_~I-S<)nw)~QgY&?V?jXQTy690i`ZkI<WS@&(6z8<9XJ)gj5
zVOm2tUMYfK+3#c@vNMKBtWA2=e7)GKbIZi(#Kyd~my&F3>0hx-b%F%l9vMv^Af;)x
z&*FjDA)SNZ*W%YPA~>-fzGO_%tyRFj-ph9?X$4AGlf!&=oEWXEBlZBiJJmemp0vQH
zpsAJ4(1F4`G<7s1L9NelycrtAkz76>cUZVzPqHm2-T9$OzSh{A$sDSSR+hd%hVs+7
zI!6Y)x}QQ1ksFzenao7i8@>tF1qpfMfD$z58;41IPOwzK4%IyqfO`k5yJVn-#!nj@
zNXh=H{)N8Nj-QoO)f=ItO?3P5^rCrynuXz-;T_d?^dO1PL0Q71!ds4pW941KVK9wT
zRoyRx(B+UpQ4d`Y7;y@=Eo$vQvHgUtZ9b3nK!H+#Ka%u35pnl=bnBq}t&lcYIIPLu
z?bp}4Hx!Az0g5l&iGpV7>&#M^sjc6z*#XJbU`!V<IqX^s)^dT6S7hJZ=J9R#aa0MU
zVKt$pkj<&ie65p(Bqj9{OoJkzHACB_7d)r6ulQM)*;6Tbhy1;#Q<^JlDo4B`2D2MD
zEy;sZgb<wMl<Zzek%J2{`;nG0g7ByT^ma?@5@g`Sj3P&1G0=(-KKK)j@3twVLlz3L
z=!u;C?v3hbkqWtxm_PrQ#185Tl`w8|GqA~vJlLA7cAx3Y?JRbGBF0EiEr8@nUH+Wn
z4It4-Rj}xqJ02ec_=aDLbeVJuF0W`WO;8HKVNAL{6HI<0%vAl-nN&_PRP^h^`k4nT
z51l3nlg=QTrXv?p4?!)?B!$*%RzP{^fJhXYWq=W%2NNAVYR%c{A({T#90t)*fXrFr
zNN1@v0+Xsn3c;+qCH0^7R8<aB>Sz0BwfxdNJ<36Ha$f^7*HOtM(8JuT#M+ZY!Yzb4
zLnW4VbpOKb;4{b|pem7|2uw%10MU4Oh>H(gx?;18S}$YFMx95I=Pk%^1P3ity%a}w
z5{|Btl6`-Oik3^cac=CgjYKz*1)&QhBJFn5ix5@;X@rNAA3PuGkI^Hnv4o~BgL*%V
zD>UQ-BkVdEhTo50tY8FiIk!3FQHf~IXa`;5E&%?G5SuaJ&!<!r1|TYjIau^PfraRk
z4}WV;OKNN`NhM2Y?LbBcBkj;9vV<PHZkx;U;JLNep>%8)zzlq^Of$7Mpqs=984*Y%
zLF5#8yp$K_7d=C>^9bkpck683p|>&XlFpcb;kRDK2U&Va9X;cy4~C>YMAGOCMnf=`
z=fHR*drqWDE-ieq1VD{-^5h=HIL~ZPbG5hWFjldHh0+B)(;niZk$@gUM<j!(m<7NB
zYrbvQz&NKD4un4Ov3z(cZ&K0l?#{#Etxe>Oj*y_~pL$#%KY*iFzT5KP&ixcAMj<m;
zZ}9}$0$aJ-jLHVRh~CeCbaicBT7-Zl$>cWyZyvNr=$n%*u{*uyPIL)gzg-1ju#XNq
z#sGY(2Ngu-%&HaGHl6*lbmslFTNwfL&!FpKCS|=YRYv=O$dlwpO|p4<tg|ZaA)D;o
zg^zY1r*8-mW<=PVqHmgEw)aw@T@T~;{^m>_N(q~_+^w;AJ|In*d2zL|Vod2}jetR)
z-Y>x=$8^eJxY$GVGaD$XoPJklIw_ONK6gRbJZr|rN#H!rojq&R@sJTbK)nRxg(9D#
z{7zteUxiAP6GLr$sRC?xs&d5DmgdK$Qe@)NVz-|*Iw6Qzbr=rCwLi|9cU;b_;s>`e
zr0e~(DUfJX(FgawspdLweU7o<D}^wRfg;dm$3~mNY@cCr3=lXsJoa$3%9*vhEcR0&
ze49D6ZwLCb@Qxo}RqvaP>@&E;hsllvC}dlV$S%W6d1lT@;5y6ykmyby9m%$&yI9WV
z(b(+lrdc%wUk#xDj6>t<29d1Z;*5YLpfaSJN43X?SLw@DzSM$CpZDJ7_|c*P1Bo5>
zV^BaUe1=AVEsnBvAB9Tt_0hTL_WO@3UiiFp6ws$5{oa82nX{f0dhs)7?MIr64&65{
z10b-Usr*%~A|9?w33n8H!BlZxP_Va>)3ZUB&T#GtB0Z@(q$7N0^d(Zd;LGle*wk7z
zRT6^^IFK)VNh?9w=*PK8s~U#LnjvbSJspo+0FC~8Gzol-H73}va`<+jS?Oc(3gwRm
zE_dabOI5IAzao#tCM7wWVSs{irA<)cs>%6di^zTL0r)q{+80mDpLQ`(Y5$kvFM<lR
zk@dMXQQzi(5*UoDWQucB5SUkVcf-~+UBB!`LDiLOn%`^avbw;Yv65E;*wM;|Msby}
z$9Y{M$AxB&*;2@+73{aJStt>6Hc!^Xygh8&BDap?f*q9o&k+}aJn;{xSaE!Bvi$tP
z7R(TXjx=FDmK?QTm1D22X{V4mlGwk|+RL+9ZMzX%`u^qSBHv(fuo6RfIxiH9-H2Ha
zRCOR2*-%%9bczU^S0toN8crH)PLR2Qi%=klq^3#{ag+v|3WhME3`%AdUw>Q;o!XK5
z(PIjw-ZKESoD1b*_y|Y!Rj{a%7Psb;D}P9occpzcSo5?nlVi(gG_i*wK419O)(~pj
zB%u%$flj6Z=_UD&GN3}p79+VDduOc6!>Cvu*-1a@jQ7Z@O`!dlj&1tv%cBtYWO|MD
zy}1TUN-~HKXA<qc6==(p;3kUUu7cd6ZZsKAA&|sAx5612p5YK9k^}8i`wKjWjFc)n
z9=JO;G(1U;jaDN$&2S`PFZREld+Y5$S$lXeSWD@L>#MMzMYd<klp@-ODf0IIdwvIG
zm3wsWbccHklFRP7T&d<Y2C7AtJ;^1+9}}k$Je<hHCe<mu=hWfBA_gIm3Wvfrg?rOj
z$~t8}wMVG{+j^pyp>+5{QsOJd!lCGglGdlSKXk%~XB^<=Bx&EErY#WHRJ$)-k)Xu#
zRrD*IihYGMT$X|W>$5*tNPP&ZF`S&-C&d}qe|$@<E=Cl!tRqi$Zk{`Meaz6=KX=r2
z@=~Gt3hPVf&hu_mAVMM7q3aIFNX7trvPt{`05`W^qPNY>mSs$<5(f&2K=X*MuFVsO
z=R8VJp{=Tlz}#(5TI--H*x!(#lsl`^W`BO?Hf_Ls?2IE-wt>nH*#O;b>j5L5U`0sR
zN}1>FYhy_TWypPpo{GLTY(Ip+B{SU($e=Cfxn}x5$Ym+7KTy307bD_dq%|3wSY3GX
zYF%{+2~&ac+;MN^!TZj;95qx`*TH6uz5Yz{ghjt0uGU}BPgywsFo@gzcGdeg1_yFj
zvz)h=pUg}6EZzi(b~6~J)~kI%>I1s`@v=U=b)70x`jah>!EUR{8(?BG4$THgTZX}x
z?St*<VYSzz5H}b6S}n>JuR@XkrPqOiVBci|39xqPwWa)jlwEf`)o=Splr*SFl-0DN
ztRmy6q->QvBgu%YvN_sPl)X<f$|fV52H87h<YbTRJ>qxWR;usweV*SxUdHhm_kG>h
zzTWS-0$^C6P^G?fryFeC(JW|?w%CrVHN5)5Dw&k^VtzdkmUuTPbnCoV2g5r6Xz<5V
zX(Y$8l$<;HyMsK_{JQ4g*auA7rfng+DqZI3Zu4?#>R4AbK<G%b&3>HCg8X06_6<W;
zruVnf=)}Zl8sY_**9DN6^F)__gQ|hWz@UCcu2HWNvDrr`q?GF;QcYrUJANHHIgC(e
zsA7~1U8O=_QWzLYCp;r(+)dQUUOC_`agHQz8$gC3vZt(4RhoVn8cavs9k*4|iJ+f<
zVjsID<Sh$hrct5?`KpkH5)A2@!nC&<zbT*J9g{Xsw(u1cz-fa;=*hk9ZJUzW0;Fd<
zPbOY0@Inj~-nNpwFCe8o39H(~*JOMtk?99CX#xZMTQ@eN$bL6P-E8RXGzPX)8Q+f=
z&@8glP(-GdjEg%1Bp+^gH(#mTn<FALjlXkvk@bnqc>pZ{hb0b1d@p3A?(YS~O*vh;
zA*Z`Wc&ET>E&%oIy@=iJS9ct8+}aTYr&bTjnl4-$%VG`(>A6=Id35<BHKfeeGHiEk
zw$)Tc3$wnY>Uw&zjT8u@aXBz-F$oDzlI@=n1%)v{Z$#qgNfg3+4Fy#O5HTR1&FIxj
zi9U*ou3E8$8-y;t0i{P{$Jh<LAlJ%vWhq1J1E3NZLobmBud}qM8kapFZydcOGOqkZ
zd2jC|q(DvPpAVujL<SLey6fyo?Ozr^R$S={rw~R8f|!Y<`q5nSfjeHXsyP#$2aR{#
zD$L{JH;AIjr(}Mafm=AFp>7lA{KES|g$Z=}-kz)=_hpEdBy>uWL<3Mij^)Y@I2me(
zDrRLu^*Q-ljP*T(?P!8YQp1MBM_~+_BlSaU3K1u@vU#Bcc=pl-5>VNGy)%)L4c=hK
zDfSYFHi!7wSf27{5RnG>PHiQz)7yJl?B3LfX==Tq3_%MF-*V@E*16Fq+$)p}UCury
zi*y=VCT;0p_Gd5hGh_?2$<8{0b(M-y%&e!L%&zY5e{>||W>-Ck-w+F48k++qG7eFV
z8gze$jLsF>bfBOjZ7F;n5XlJ5L|K)T`2)KJ{Q^QQ)^9BH0mA!;CV9=LAf-%#%@wn^
z=utJ;T%`t|WmLkVwtFCs)(AXx+vKw}p)_KUQWT?w?vFP_pPd%QLe;ah$?gllki-uN
zx6O!-P!hkuZ;n%q{0WYCJ8{qJQ1IgepyluDCCr5ix-t0zXoNRZ9k~G|#-~w1ln29(
zv*ZuM7^gW@@jG6sqPDspkd;hB`RL7Gst?eC4?P4Vf$z7SI9b33E?*=}ntr>A&<?J~
zY8IRX%)MjovoQ+>sYl{0BT|raiv%xR03Zn11G)8|A|$`aWJ6PMlw}CoVSHMjb__iA
z@8e9b@g=)R{GmOz6g90Ip3#+~c;fPt#~ooop_x4Ha}#F>F^>u^vc)_aN-_8Cdg#V`
zq&v2!2?6zww?jC5yUPrqY}#h5s(?8h2F`qH@4L)U;$E8dDF0N_fOsr3Ss-u}D1w_d
zx1o`jVlkT(r9j$*!N;Eb-^HO3IEYJAQ?_@s86ImhZze-uwZHA<_`;*yLPSes!_w}%
z6}0_lW0Jt7ohhL_i@+E?9}K#8-5V@MI5mwi91%vmXQ#e(97%=nZ1S7yOoUxT(krLT
zygpCBqZv6k*#pBI+$7$tcsbqbXoE(cA>PlB94Ll1qqiUStGH7!Bh-^iGYAXq+e=ER
z+o*8a^$jIaZ$QMOUvc0`s(}7yiA!9}cxiX*`nR144Nv+MDMAK&A-nq{TfdK<O=C!u
zMd334Hj8hUFJC?>deio3e`?$mW8A@L+_m!eP(UfPajG#=bGV%UhG-zU<-`-UDW&%M
z`OgwgWDEOXKxo(1IFVgEn|L_K13nvCz*5}yPI`{K48RZgHOaavs4L;Ompig!76~*f
z7290mv}SWjeX3I=ve)am24FL~1P{xzIVoJW)?zAAde5Y%h<`Bq+Pf^J(O<FfqVttD
zLle<}i-}Ja(`yZ-0NLQg*Pdt5WUjzx<&(ggYUt%+lX5zEHmW`(rN+CaSyBB`C*-~(
z75XnL4#WYDfDFM)#q+g(N-BC(dZlk!+#9R*P@a9MEfS~w%KO83k956%k_!(t8!M}n
z=$$_CNbelVl$U&zWl=VfARL{|(LNb2)6i9X#k|_gI3ek+H;ZLJVVqqQt_)QsQs{se
zCz4UPyueadav;jM<Hg)dn-_Lr3OiGkG$bmmwnwG-Ph2i=3((3`vXy@A<UeSdNv0>4
zZlZAs?*}|BayOv$DMpcP%SF@J95h2ZP9LG2x#U!kHG^?)Yu@mL$%nHiH!SR(&0=l;
zIjh@%ENYn-2!wdC*-;}iai?N!b{Q!pY>IPy?R^%)#GCr`AvfP`MlnOJ(W&zc(AV>N
zN~qqVPF|d!@ZBu8=-9=v(_@M=U;c2Tp;!Fr`oR_qEmol>+!LWgc;N^%*5{#?<H&XW
zbnCjH5b7`vaDFSP3jGy-zr}6Bi_01MUmyQ-0Gk;qG;AeJr=#FMu{Bh<<@VB5fkHZP
zwGZHzGDP21Q!_u<u~SfK`)&W@>$0gxH2)i#1p$!)6z^hApI$*X{-@rqKOS?{+L}`_
zFW@uO#m}!2UH*D8W=Fz5P}-Is6VQ~Go}3aPB#PZS)3bd=ub)NK2y$dZV1v`*01A^P
zEA#vPz1OlU#MYYv3@(Ud&#CpM0M+&p^kCyuf?Q4%-P(r%bLa;otPv_p6X3`M{e&)_
z>y2KMXtoE&RtKO3-5vjlW~plcLreJlKkp2qDD)B#Z{7au6JZDQm9xoW(;5vc$WS|Z
z2tf8(q8xZbTgRX2TO2LP_0{2$WpN<ofEf7ZDT)1Z%7m(ae*CvK&0ov8?ABnz0QNop
zx(Ny0y6#R!TOv2of6jdT`hR8xa3v)sDymIkOlCV_<KKnC|18b<2y6Zde$#KOPN6_p
z_;1cT+GJ6Xt1V&YXwaTu`*z{}b%_4g8$iCBvVe4R2%AeFhWhstL>zWBzvqu%Er-9W
z$j}n9uvs-(`s2&lJZ#@?oWq(-tlneIN&K}y7{*aJvnm}!$2a`7HiUOp1^(Bq*YG+h
z0M24KNQBdTEeibz_-4O8xbofaKnqLPV@yEJ2Hw6UIb<Holo_kfhaT(K4c0&83I_IH
zSQN0pIy-T{(FMS$%bELIlbDS6AS~){-2{rt$Cp8=S8-6i>h>`gXw}E0mJ(Mgu!%f5
zASXpMPO|<XmL7ijNwguuRCxancx!YeiUI!opGUjrN=+9QV8h3{#ePHgU_MMZ>w**)
z(nV`uenojpzv@7c<$?tl(SQ=gS!b?aA6)tFKWRPfy8s(8tZvYJ#C9n}TDu8l48hN}
zfq&PFxa8zy<~cE>R^k1a<q0#<L@K)XA@(YOZrwTP;Fu*dtnTRWdMTZV>mcqqw(fbC
zALFbj<Cw_SvsdoAEu&?xTPjl`s9lfv95>HMCVz;}_;Idd-L@@lH|{M-&ni0k<sDTe
z7AXBhQk`V=@dew)?CmQEwTwg-&LWl5r?#!&h))M?08Z=UKTXs-eXs3o1Bv;_a&qO;
zSe+d>hCi3;?jf9M!yBt4n3JGv`Fu~<O!g1#+htsG&2Hyh<=vl$8sy_10JeRrit3-w
z`>&b^-u$5-Xz(3bFSf3d3s8`XJFBjU`_daPefeAPnN2|#mCP;pR`#sAMC)E+9nR7(
zfmthLguy+TbbM45kbAw2hU@OO4J)|n%h2Xx0-NqvadsVESv@5AOl8=Mb7igz%Vtr|
zgvjkT>XK-rF#6skO!@!{7En2pUwe;}vv9uUXS4>{zJ}($d~jaf=a<8izqk$)?|;1Q
z(n)#f2G95`!+R_7`oF)U#-01~<9}fPGb^EW!B+q(xlaKz&3+<4_+|Vjyk5kE@M~?B
zzC#Oig&Cyd-n;XM)~zOz@R^-B1j$2y9VUT_E3IS1`ocA4I}znF%L(jO^cl1ka-25<
zRc)S<pWWgnV*%H~*b>IC9o(gB*R$d(T_GiXG0p*JMqGk%X8QYqHj{Sj5mX{N5dQe7
zvoHnp-Ndszd>Y5LUE?_3Y7L)x6%=24`?UXt4!zuCo5Hv@p`fNQh}>;@x!$|_rUy6Q
zbfGad=v~bPEWh^*Vz{_Ar7mh|tPu-=#j>_5!<vU|a0{NPTm^z&zL8v{1tzA#`)kpa
ztsny>D$Yr3N_%C^JP=9>J0{|$zwqC$^e?m&@Gz99<?x+t?JwxC{KYN9>(N%0_x~@&
zjtxVo0Iy8RgW2c^ZGR7R(YpVkE0?!oMIm+`1ntM)a-(qFzhS}%ivAFRe{dTOz|)kJ
zd`|+}ke6`wZ(^tad{@j1)a>QOGw{1mD3F|A=anogfVI-~(ow+fM}_?JrdwN(X8wOs
zf8Yeq0SAgrv;Mi?)jZ>Wr~Xifr?n7eS}qR#f2cn)wouv)s}CLs-}u+TWxho?`!~(_
zuO$Px8SrjzOZHH#*bpxW3;q`c3sYaPM0G>jOtZ3k|NAB`ePZpl7vTIKGQ0W9+EP#v
z7XE&S{!yMV9LNy~6IUT7JA#q?f8AB?Pu>rtn9fQ%SN|>>aLrW}zN)!axIRwV!X)#(
z-8iAY*7%>Cv=y857uN^U1r%b0OXz*S-+66umR@U(>jSAnnA21EpYKZf5qYGpO_(Ep
z>U^kT{qd_mJW>QLcrn{(d>MBW+eeAJ@ju-bx0Sd=7?{`wj{`BndRqeh&Ob;9{!N_b
z0mG(S2{7VU76UV}rE0_fa??+5KyYm~M%w;YmYKCDTHPlJNqEniOs(<VYtM>PChqw?
zY*-SHB_f|C^}X~36Ar3q%gkh&QA5R-W6U4<F`@qH|H=41T-e7vVJB!kjQ0>Jf0_Ae
z3(V#ER~pTf1+1rCSvJ(nS^RQ+d5g7+!@@6VxvrkCbDF5Mu?JC?@>9nOyIl!1-lP48
zDUt*Bmex~B^SZ|>vOljlPPpbS%xIKZ5R-={W)XvPy2+B>K-%6}Ndle+ql<-e>*CDM
z(Dir6<n!Y=_kVtUsc1xUaj&I~7L#>+_K?U+aRO}EP8|2@rmU?1(&fs`ScI)FCtbZ7
z5U<HG3(-fbXvWO8KKYUH`}hp!sj%c^F6|4h>ys4#_OMfX1(lW(n;&8~s6w2B+w$=+
z&fC{DPIi3RN5ej+!nLp5Uap2{{%O#{<PJgKZ-?8x<2ABYV{L2=C9dI*hm1IqPpu(2
z6ryn3-QaK9#4t|09<UYJ$eS0xB<~=_SzuSx?Jr5FB1%CD(<{|k_+_oWmEMzl{I7Vr
zMAUgTYk{E$bzMq`a)hX38Jd8$?%lX145T-<4|lUVrT^iPp&zr$2_5*NDtp`(DTbNy
z`%9bp*%oESH|YN69<E%-!<`@Bu5>jy9sn{-NKVHu_6Nopq?bsv)Bd{EAGdfoy$?U-
zB)q7-{c`EPPZJhGCB9JvPKU?zxz@RwaCr=#KYz%vel&|=>Tv_RMSkl(=E~N|JMIDa
z4E!yxwR60mEkFNXwZ`%vnQtSP&|#xj(KOA!(J!qkRoJyvJxdV{C;5f>MVfU{%$3b@
zOBAhZlJ}3taSZ?q!@v2M;{T#GK$$M35B4K(<gUr#GwU4z>`jtaf41HK8Qzv4gv`;L
zDYt##mdQHr_|q+0d#iOX3NL|Th5P89=RN)`o<T(2x3=1=EB%{0g0lrli=n@*y5#<X
z^If<a68@6?->t{$$=ON7Pg3dqfwcGFy0u;Y;~&^U%Qs%tkf68sq5NJ^R)ycP@Ixqc
z?r$Erf4b{caGf~V<dgJqPQM;+<-7kJ94~#~9&#_M+Lq4WvQp8$Px#l9{~dRUp*+>I
zR>n*vTB7ltYe#ui@&Dz=qED<udnL#gjApjF{jvoFSU-YQ8p>a$>(|mOUlzL+ToJwe
z?~s=BdQRZ57Mc9x4z51QavX)b3I9q}5#Y1>*`+7OUL~yf?HB8wa^?59>u6IN)y@>b
zFR(ScaQ}n%ytJ^8boE6^SB~2OW`9HrZ#S>aO#Ire_513K&?V#9S6W|z=XHFIZh38W
z{&|JBV%xTdY|*`2_qS*F@hE2KUtSiwA0dH~nv!X(WJa(xyKz&$o^R#5e-J0RSy)@|
z1);Jj(pUea{#Q8I*57+A?q@_H6(xntFLfWrn7?QJN0;_-?elB9`bir8v3Z{P3BYw-
z|4(VDwNEdv+R9a2CE#DfGU+T4aO=u?VtYuB?Ogk5_4j`WAkGWPLT8qE>kbUggDB;{
zr8w6w8Acs$dXi+*Zwrd4BVMM({uiA~jL!C}eVt2BO2&y<?o?TSnSa<4(1aj=&yoBc
zJmF%p<}ED$+u2Lb^1u5B@cUKMfRv~gB94o&Y3<=%)&%v3Fj?#GxTI+R>9!aZQZJif
zb(Ln_jemq<&KUnK$L#-Qh4b^^o(tv7Yl8dltT?|`4D$_krS#PCZR=qKDOK@WtGR$R
zv+%bTTld~_hDeGV>0ajjdee`L7=~2|S(S-ZE%xj8`;jXAV0!HRnfyS4m8rS4W4fmG
zetmxFyXE7XkE(x;De9v#03ZXsAMft}l|#i<SvXJnS*&?E%yazsQ8T&=D^E$mOIWzB
zHDr0DmM|UYY9dkF-*QscqkkVx(Bk2GH*_`4i;`dqT^PjsLhx>%a2uPoJncW4#&<Q4
z>F2=bfUo?}pgzO-Bg^XC*YcB`1vhP3&g2G&$WC@xWE$pevs^ua>r?aCPY{4m4}b1E
zli@73d=<GAB80-UevN<yV>&jTa!A!`(m(B&dRX}g9OZ8i^aq_dV*aNj%LNgQll^`z
zc#3uEa{U}1`W)jcg5}e-{`P-s98mu{2s5oC3v$9rl3z|1-uX`pS{=HfS$FWb1<u)m
zvBP}~!C$Vs_Ao1;aly|^#23_~(gT1iVRhyGI%-Jl^N<Z1Z=IS&e)mtYVL=-JYlF!t
zK->yRF8`Ke0A#jK)Cj;K^xLrBlB^w*e>$ye*(1y+ko|jGCD2t@|KlzbhHl-D%YXRm
zD)oqzb7HnufT$CQwf<?F{u3?dLoX2hXPLoU*J)uw;&;1nhyQq`-{`?$A({1&+dDJJ
z4e!p(NqFx6k)7J}d#n5E>tVhS)%{z-2A0|iWfC<;Q<iQf_ms%(zw%@(Zwu}$u{qd`
zXFF=7|9Y)YqRwHfkFdJx>ngYy8#pFPAydKf%MPzxE)nv8f8X!*a=m)ifT9D6PMgAY
zDo(fl@#%lpMsNWzFvIHK%MEs{-Uxz>!vDbu#Z;p*>v;OZ?Yr{`g#J;1{Lb==9(LKh
z0zfu69?gR%+t)dC7$QXDw6-e$Y;b6h;{Ws>YQW4?gv(uUzx~U9J_r~lWXZ$%Zv3u1
z7I)$QTd~wt8SMD}Tz*RwwJ;wgwEdsO4?9Z3^joXP>I-6`3I0nZKOCvvZCh91V7<Z!
z9RA0K15$CE-<|GvG)$>~E~Yc)IDp&yDNXB&4!HH*21pukmLxs=|3hRz3YGxqGMYcy
zKgDMLe>4I*G0_0!rgZP_FN(lbq^tiT3ZxyZAh_`^UgC_4L@B>X!O}ASXHfvCL`22%
ztc4<Ty&dHt-b3==URJIVxeSv(UxJ$~$4uC`KCoRnKfi^PYsYg*?Emf?Ko)^{^ReCk
zvM1;gYYhVEC84bF`F}p*ay&2z`e%c{(m4zvIIva@UA^4;XJ6kSu(V5OocaEzoyBtP
zVD86faAg#2{j=5qxKi4~t4CmMcYhfezyfjUc6lYgW*%M%6RQ3%Vvi+4GDL7S;M2i;
z-v6?gE4Fj>U2)^6c#&gw&i&WhMXNba{`zTUb^h`W_9MO7VG8{Lj9CP$?f7du{<qJ#
zbe$C+Fcdky*`Iz(F#IoV0@!RY5WY&9tOzeU`~TG@km!Y8G$Kn(>$g|?t4-i<y?Ykv
zKhjq9`%B&*Ue>w!U-ocqTj4s@)WZU&UAnU+o~^bsjIt)$&zR2vD7i5puuLKjK7_2)
zYCrbGA~pp`7rp}L<WTlxfoW%7C5$HifWKPJF2L8BIG1@g5<mbs(e1c6{GMFwcBs;=
zXQ+NI7fUq0W$Em#|FYislZ7+ex!K2X`Nf&9VF4Pw>B)(2*8y>GBxM3<LeOv(On~|#
z;B#O<Hbpa}Ecl}hW~|qtt4<Nk{(S|=Qyn0N%u-NPR7JF_04inq)H@B@c^jeKSr@uK
zwd19`E-%1KAqq!jpq&d|_yO?kfNn3;1OSu9KwFmz1P3P_1_ARw2byI0Q@tgxWNfDw
z{8IaKLV(;$c1qaSkp+LVukJP9)lU>g0N|{9=nagI6=$@<B%!&w)i>zp?r+X`^Mr=G
z5b-dp2Sfmy!Fo-Pk>6ak)M2xJHC$04*Z$k4B|yw2cogWaJ#|U024O5BFsP^j7>-Uy
z_|Nfg+sOJGQ)AGC8Um>}C8{ex&u@MekB<Zh!}r(9{JyIg0=#<2Sl5AeH0zH>Mj4#u
zkKdS3<wvcm^|XQ~V0P&Uro3NhGC+1pEruwP&OTs^s@O1^<|AsS<cZ1`CaEqiiF@d>
zNZG*;BUFeXd;o^rE3#3~tBQZcHn*i&I~euFS^eh&{SftZKg*8Wy6pS|kpU;*8_hlz
zPGHWz!1fFQe#_jfMd-tTtrt?M`iVX*AL!I{Ve8cKhHt2XcnmJ4v5)klxqXAq({cN)
z;d6zxclI&+#lslVt$e~welC4H6^s*0j4vyyV`X8>j{W{K#jyj{PgN+a2n&}SOOO3H
z?R2m3DjCH0jL-#w#BVGB9?^?>NwEdutDz=hC!BaQx)%V+sF^c7+)ytA5GDK-IixbY
zYG1qbjGt`S!tpjg#r%qlxBTiP;xd!%FCDY=-Kt_D)qp*&y0C<N40fe$w=$5qemnI_
zp+Gg=<OTzbNPlYz!$uR4g~SBVYL*~CKgGzRn7)x%)!<`BUCuNJzZBszmrFVk@yS2=
z_(z)>dfRoUYU$p%OmM1V22Ww;NWJb5=mKANGUYV9<bcPS?Z@MU6@3WmPxgF#Ox>M2
zfiQn`f2KE^Kkglgrs2M4X$E7Vh*uXe8&F<+vMymuq3rDPn@l66vScPOA3%vc{<2N)
z<nrGs1M_M0gRYEqS8}GAh-HdEpQ{aBYu>_#A$yY5T);#^$^;@ry9JX6kvSpmTWrq`
zh*4`?1uoS}@rgQ>LZYopEk3_3>+dgGq6SMln!D&m59w%e=V$xnZXi6*m#8@$uAE@l
ze<p)ByI|wq4y#WHdDQ49na-ue@cxg2M{<F4mCwB6lVJMkiFNNpnL|MLk1q2&3RuxC
zJ3OOT%uc=r#@z&lu4CuKE}o(ObbGiNL2ov8BhV704I7N^okw&#Ku8cnAzi%@90V!n
z`z!za>y=i8WJzSftit4KWr)v`LM)NSl@-~+;O`eeE{S1AU5tt_lg!#{vKe58lj>tt
zY6o^2Y+h=_tdCLRWIELR9x#_KzrG2i3jiA)&f0rV1`$b+wHzGy-MKC%3i|yQumYH3
zQsV6)Z2miH<W~+hGue_t2DX#vaj}?i>7bL<zyj7EOUq}`B^7vF_r3$b)l@`U#2t?8
zM>J}PjX;9#;AN_lU4FXJhwpnNL&Ll>4A4c_S%QjHxDfO7E-30I7z|dXlde6Cghz#N
z<L|i(q%wbbh+ZiB+G*QOb7EPU)-&D&9dXrIA*KppmPdNmQiexJcS}?%SG9kHfvIu?
zU<v#al|kA$KuFUB$liiJ+eMlHqGSNEbpdbh=UiZHQ%lswA{v7<z}^npdkUsi1^@#t
zwfd!aQ99zRxt#-3^QnLd^)Ve`XQQd|a9adn280`u5-}B?Fe}WQ(zOV9qTLIKV)=G3
zSsNNw`T?`}qZ)ik`3MY>3v6cIE>CWHBTXm7^<b|B9LEOxXtrF<n1;#BuP{ZaM|MrY
z>7Cz8l>j%b1yK6IBTg_L9eKQ$w>`~OFB{{$;femYY@0|v%T}Vx-(wzj-5q@0SgSci
zXe%<T@TjQlo^h$J@27Q3h+!hWzC1s>&dwl1Yoo*ZS!%@~W9PSAk2pc1LFYLAJq_mT
zJL~W;S7iK}5Cf1}4`71I>J}j1sll{38lC$Lyt%a{294>at>0t>dJmrZIbItFWIL0<
zS7AN)9w5oDBG9Hj!!L<122yHe(qJRL!mFEt>{}?xH33S4v~EApO(Oo{$-=#+%DFDe
zb!H^J>cCeXxnK;V@)fx-;~Kzs!w?zrC8`9#&_C&*kcV_1)s_@Q#R~+{J6bQNx4L$u
zg0!g`H^De?%;jfB_@%j6WF}$FYZ07Nw=mnyWQjBMyU=jFJAbYIC1;<(OE4QDHKh;J
zXtxIIftXH;(CqagUVY1iS(vD%bsro;Q<Ojt_uvMAr^<Xqh(<sh$XkguMu^QN1HdTz
zIormf31FsT!S$foK!w;Cv+wgHAi78W+=c|L*1AQ2m-;$UUHn?NIz!R5PN=#+K_*zB
z!X8ay3;?1q*Mzvm<z8D2VIb&<#9a2?qI;=HwKx$by(0O>Np+qqBDmit7t$nZ=W6_p
z>jq(z^9EQMWyd;#EGo<jBS(I~Bsh|Vam{L&v4H8Iz!n%{1PYK=zMW%i$8I4ewhZ}<
zwuyND9eX_|;WDBy>l{3Fx+MkyULx+y4~Bwt)X7I>>Gng*+TDW~-gVk<*<V?6{N?CC
zZWJB}EU}Y;LwA}8fwk_!n8Qd~Bk+&}Q+=ZJ+hs|rI&m~9Rxt&^&9e-jFR-$`8$;ww
zc{2Pb!pjE!xm0RR>jFw<?i-CMAKK!JfFcUJ%lI~oi2EsKUD6=CZ(g3gA6cD90vucX
z+jmbz7Oah`aM*2h0z90emSj~~09zDH)vd@AJn5A2N(&8YH41(-7tPI`eeN*>BYsK3
z4zphfH|^6-HLN!$BBiPt2Ou+L7@j=($qCW&{RC+gbrsN~#M=ODscN4?&Ay9ro<wcq
zs&d!jdLr?nBp4O~%MTBC@LhR#mvu@>4*@xz0?+{Igx3J2^vb$Xnk|;a`Eut@?$_!t
z*B{KtPNGv7Z=jbAJP7rh1?!ENedwEvgjt4VbzLJe22y8L8{##jMb%ZU<Pt>E?UQ(N
zIH;@}fg|@49;YbdrZJY)&}WpCHaF3)510(nYWE6X;!lv&u~H~v4Khs)w!~)R1i7k&
zE)TaLqNix1&dzBW1KR`}Fw0AP*A!v19k=btQoLRg=|0%Dx2tU16Yg(vC0L?O6PmB=
ztN@yfe+X=;<+CXKhtp}^H8<8(b&w7EbHV|;6Vj>i#3nm6*i8s+ml^;9ofR1`<+~A;
zxws+1FY=3J7XE1P21e85j(n-Gc;V$n7L)<4|58{D7GHCLde)0%r^i(vDHha}m}-tG
zi3^h)0GxIvxmxhRacKG^UJnG;2&Sm5@dN^K0zIlp56+V0>usUEgbQ#9dn!13{?akE
zNat5|fLrv6ssnl>GU_BpES?BCwwW_pkl6J-SEN0KIHd84!TODWofDVhTOX;YwfEF5
z<3>S$j!)7tH8U!7T#BJY-&1B)tI`1bFaJZG_Iu@@F!L-mv`tlBF2O8f=4%RVNMRJC
zCW-VlwwSpXMxi*o?@`mpe$^-PSbzUf%H%kypG(A|Yd+&r?6vx3**eKNqJ%@Mkp73|
z^V7jToZ$4cl=38Hc$odhN`uarX9vVL_WBt4AM3xkLG?<dY!$$VYQEMH%ZZ3T*pvE(
zOy?rw#mISN$_)-BOWD+DfR31|(J1kY*UksaqX#34uW-z9GP_SrWr>WHaStF4ouiT7
zQ|g=t53)Y&hj@lBPA-zFcPB@LP+hx(lBQInLA=(@0+I^`qB1;r?tp+Ri?vUh1#M{p
zB-BY`I6D$SLk0fyQPpb1rqoqm9Jf8fz+QD=5PYQu0}%3EeG8H0_%Rp7?^08$Y**EU
zv!wq>Yh(8j1Kp*GDxpS9ZDkODZSLZN-PovJyw_p=<(whz6^V}1HwTzZPP<;czt%?n
zwqFI4Gv*+;Pmeop=*f}7xG<y_v3(R@Aoe1PBa0-T9j$IfQ1&UK3Vv3b9E@M7F+dE(
zR$HQA($HL!BJg>)n(_BruV2-SKdfV<6KHqi7Ni0S(DzcJmLXh6uFm4`kngO>W{6gg
ztS`-O=s2nmjC{s`Q=q`re`kdC5yxg#GZRoR97@feKt5f|N==y`MYT6Z{S%WY+nA02
zUR8tm+e6>@Q@qs2VT&%vD4X#)E!gYxN{Ga4h-da0Sp)#U;=0@EQUM~gc}=P6H4s<6
zVg0z}tD<+|3Kl$0mH0aF+K;l}80q59$8nY-TC{AF+5LgJ>fuxo5J*eU!2qr_Tf!K5
z=uEa+9i0MUoT3QCV<?yex^Ts%fE6KVY@)ODK=uwHva6PB#^A|eKzQ6n*@~pD_YI6X
zW|ofGiTx1LL%N=`MhLtefM2;h2cV7YExtd)Y(r_NiR3A&LT7CATyxUi3?L<A%lcSz
z5iy7%=7e|i*&Jk%oyYD&0MG<ndRJ|uCE#Sn0fN?u$k4dn-0VdEscwme(A%@OcM4gZ
z5w+^Xzu&Wq<m5SAA_zUz=fIUj*F!I7NB@yixO-cuXAyBHARqR!goa81X~?jrkglfC
zMn2v9cOQfA7YqO~EKCvB<ae4Y4@(F5Ic}chVvRGmhH*f<;s7u4MLAC|{K#2`dD}8t
zRX(BT+#?L$3-e>LwPgA)cs=B*!McauyoN{zFA->6)H#1dReBKSGG*18U_@v`0=r-Q
z#h<!bZK=TG_7(J=7j-4@it)1%988PSjU#=P6IHGoX*<#Pb>m)RxkI;^nxbn?ZvJKW
zp6z<u@mx}h?IIh+AWShHO9;&U6zP2G=k}Y;W<_6-T|pGGKZPbCf;tiRMMI-N=vmf<
zt<1?ph#Z?_$3DOnisUqAdBxtMof@4v3cFoi(Jd}cZxasKt!ll=-G?;Trr2<@1>~Bo
zY&tUta%6UY3ozn#5^wGsJCUQsd|nO<zLLG-GhZ1c{fLw&5Kw8_kBHT-?c&5obD?t4
z1Sbdj#w1;F#5>3*<Zw3BV#oQtNspc%+xOEUvDVdT><a|d174S8j+1Ehmp06r-#nc*
z{i0@~J2-?V?1+6&>n`x(Ol?n#3+_VYZ_Gp3&wCFqD4_45gf%%z`32CH9LNRFCn}_!
z8-LOG;nA))dSeb6d1W&k9Tzcq9T92~C5N&!fYK@wcRbzbqmK`5Y)6C>S;*LPP9Jdw
zT;bE4!8;Xi`ldJ0Ih^;HmUI#pIaqVz^R8_Z<SLK4uh93)zn&FOv9!{({(i5QgG2^l
zczKpQ1u*N;k#D$jlz3}!@OzFK)Qa#NK6LkH7&&E2G|N>rlL60FC%2O&|J6&DVaPrC
zyO_P(7ramro9CHsP+JJBJYBJc@y>zAns6M%5*iq`Ugn2VY{k7}SjKjF%sbGZ{lqnB
z1XVP8SmRs_zMpq6OZz^N>OCziZ+YN}+b(^SL7q&fd?67-OOkT&hH|;l1i!<rZ!j`_
zWLT4}v2P^!N<A2SHk{L5esF@Nofzs2UuTS8gIRRsB==XnxYJwtLcK2xazEC)n4{Fe
zMAWuii(MAAyX^SMJq5<n#2P!;FXfQc6k7^?;*GnN2C#gaSX3k29KR<#_d6`ga0_Tp
z2MywPYnglCGgMOzXK2rR%ngHsK5clw#|EaB-RO2(CYV-zq(yv&KaP#(<i4Pev4rqs
z62_bhWyX>lPRRJOt6YpQaX27co8A*cIqZj1*sH6uINeTO`(Q9>%>D=9`1OVxPS{qZ
z@AuqkAy_R`TSE7`9&BG-+JRdYH-81HiliN<UP+4{w^a{Z7~$y|GGfatUL-v$feC%q
z`^?hT-eAv$OLFgai0`{1dS`fkykOwAgGlF_&U*cgQNgzYdUqr|ikZ-7E3zWyBE89S
zknm?P(*$>tNA#^|vz7<etGR$`U$co1d*jZ&j*>X}XrB23p~Hx>Q$-GdtGt-h(()0W
zAATRK@chXlJQmgiQVm-+KQ~-{#8-~kcT?IB434&3|AIZ#mk*$PQNam_cQ*O^`v+=(
zh!JGUiUt)&Mqz&F7QMnQ*-zgwt@i_tU2%z`t$P4WShgi<?EWsX!-{XNNo%!`?-Zm4
zFxqc(ed_1^Q{SEWai<%h?#{<obVuy9oi9ePxs}mWjBQ731gg@j5YrE9D%-1g2_4eS
zj|wadl?EQj-DqSTFTX&N^pOi7<`{4F*d&Nw?#Vgto>61C;kCt0d_Fd<gjK=m2AJ58
zO=o@$oB$!QmkD<gK7=!OI?QsDXyp!4Q4Apyg4>LHOL^LAgmv65G1rgATOklB5nJ`p
zLMn!XaFpfZh*aa!DCnwubtbH3Noc9Vgc?a@F@!$0ZI2cP4rQyxe6EX<Wyk_O7b|Cx
zY)B3>08rupY6d?Y8oHlL=?7TbFluG6MfOwjggUuf^jIi5d|yH!R)!K)v!Ca{l?T$%
zd%5M2raez-c}rY~H%8vN)as_t^18RTA^{SVO#r4{tHDt+4yI*pu;Fl^aa)X(2mecQ
zg*$>SC_nHSC%QrF=N7gxz!%(^5RFVud*XJUbX_Y06Jc<;lWMaEL&5P2pQZYOP*{#c
z+_MT3&-&0Op<d4E<71FuvotR(d?~li2+`*X_5vKb8-tyYT<C-*uz(%mXPC?ePAWd2
zYcOif&3Uqqzh(j0Wik~NKBcLj!t5uyLMBJOoJ=6~@^)5qZxFfP&!AXph~JvlLrj?;
z$(h_MDX+RizDQJ9l9cNh$-Pb^UTcTLZQWz97=LnJrHNAx=M17smTo*vfEC<49jmA%
zAp)@MQlUp?*;NO=#*9_^_3g_pjB~PJ`9Ph{TLCG6R-i&Np3PS4Muk+294e-i$gegc
z828ph<Wi09*f%-kwAgf8MCRmo;y}Q;&2FYOioK%Rr^cWOk!jk9W!|PoGsu;zbT5(1
z|9re?pQm#6!Ka;F=`T^xp0HnZQ8WUEua!<El>rlN8Ui=ErJCr_rmHX>*Qc%+T1#dO
zAVE*30Ad0#J9r$05ryxA-VtK*P4J9)bPnUyxk?}HU`$quA>c!R9M15^m;PJ`PXK6%
z<ZWcu37`o|RMw7k5r~ov9Za=7@^BL3f)fs}#vt!8OQusVlKg=dW#xZVjV%CF<r_#0
z)ai_6!!rT*Se(=1LSzwbc+4x08xj2*dY4t=Qou<}B}dTmB}w<rZcbNo%P<}io~d#7
zAKq+^Lp4Lbi0XYQApq27eorSN1T-KgEsm?q$$o{Ho}f{u?T`^(&^EjcZb$jnK8ea6
z2$_)g_wn=5StuitP*jlxcZb-S%)fa<#6$D;k<K*t;hWuOSYK6|rq<j%UvCqAPV_S%
z-^D~_Uqz+28t5<ZmAP-ZIdM5h<{^)1{Z#H((AfqUJ3Y?>)M_B&?9Cz)GLbJZYy`SR
ztJcmO%Hdr>mXOHI6b`7CJXP->1qiyifm3sSEsX9_mlOSlp2o3Py^=pr%Q-zOHwg&G
zs{Va9ZX<wPwuEdQRh2mKSH6+Ol|_&%DbhEd+k4|G0#YjvKGiUeJ;UBonhR(OGB|&t
zWMQBysy(nBi9~ok<<E_YR>12t{;D3ZPWWTOSmeqjwY<a=HmLL|HvvXw>R2`?y^8qk
zQ3-@p9ZaAU=NCoQc9HGmMBSzK3lMN<&VMN_0Aw89ByXMubqnbjt-M@c9hs018Ie;b
z^4J=Ry8%)w1@Jo?5ki-yk{76~3d{_&hRIWF+b{1Tr8-7y&&XV7w9D=18xI`8Q5gal
zukVQ2YqGj3S3D$vD@&&*>~3hX%Dlc@K+JOCkVACkK?z#|OGw)iZIJ>pkr_Acn<ONx
z%3GZZt@jvyOTHeF4{+PvaSKvZ{jEyhAfYOy@)kJ&R}mfg0YKnf)>w?5`QQfx_a^tJ
zLh`JYSIGARzz}cZh_CNXAq!RvE>)t?NUJcQu^`!~SE}UJuw{o}_u_lQH{1BWFdm6_
zbtOCE*Qd2O>ziN;e&0#`dI1MD4L0&wmRRu*)5W12L&Zlze~fE1P<@IZkS8JT6HH@a
z=DZbjBSD}7Jay|d#Mm~zZQk1D!5zRB{b+G0K9O3s?B>lv8vTPB^>z=5W?+!7FFeLc
zHR&|l#*>@b9H(!b!Cq%`{kS+aMsEGOAWKxM6i8bltcJd|s8}JpFR2>G`7O!o4vN}&
zqSl>~4tAV)q7cM^{0(PFI@$3~KLE$seepDtZx!|6>G+z|%#rq@tVmzOFc<oXEQ{BZ
zp^gWfF(yO+GQ~Vg#r*htrC+nx9pBHj{3*#s!?fm{uaDS&2pN24ccTs>+fGz?&uB3#
zRI?>o8p&qX!GaBXcYBC(x-Cl%pzdILenw?;X`Fyio4lwc!fuD8X#G*iZ#9&HzUd`n
z>Si8uGhiWR`V>6K&!w6NCY&4fP+@z1NSKvZZP%<blNvo|Lk_Vghib|vNd*~tp=jPT
z)<Dn1wg)b|fM6h>O8nb`?7k)tdwg7#tGewxLMh5>W1exEh(bdKkaM1cK#!gG<S>Ax
zNN{*`O2y0>)TVzy<YKdYMn3y1^hgq3b5<0WFhE{j-G<UQ$_bM_UL!g1Mb|wQ&_b`q
zhI`!5`$4)aJnwekP<<b_l|&d^Ij()YXE^i;<I3V&=jLgxXEa1oD;PtYB0kF-cRz}m
zq*iO~fsXtREj;8r%bX6RP@Koe)>ZA|Bcj^=+C%Z_(aX-Z6&jI5(1#)b>fLqFr;2Gg
zi!LFK2WIE-maoW><7kh-v>n)+(;@5Ijon0_xMBOre3#g0-}nmSUfb+zX@VbZHAF8v
z&P9&Q&WNd&w#?mlxyPlu4sw>?R8GmNDYI3w`0=RSadhbhn4HD8ib{Yh)iAEC(<XeJ
z(FzK;k9@i4ql``|UPnCB;I#1GqHX0=e|yaFM<*Ua{M+<Dj{zGmO5OYJm!T)A<lnSe
z1Z?|2tgxR4We+NEq7-Ls`}P_Ixld^|6IjcYQUE$_<~u%3-g~Cr7s*cakr_YrCIvCb
z!7<<tJZToz+q)Uk-wEAUdf)g$C9l?E0Y|e%h2s-7`<AaF$W+dgp&aP$g9Q9}IboAf
z_bE-`(msCPZ6AN~o=j0_?ZhF_bGCxObH32gizng%%q<QgaIPNp1`2Zcaveq`j`j&0
z^~9&b#~hi=*GIz5X?<Nr#+h_$?g<7rLmIT}#Frzo8Y3EU%|CZPkD-6l8_^7g%@k<s
zC!h1O$mY86c+!sCyhtZ%59KHoJD;P>N0VGE37_}Y>?$pMx@*IRpZ56?^>aE_(_iOo
zenf}bUK*n}`>^Bn1({>5u$T%!X6mBoTPx!A)>{d${i4y_D0BsQbU$;x2L&eoK|X+3
zn5sTgWji;=?Bg{7a6YmE<mO?V{mi7YR!d1p2t2OMjCMv+r9m3eXx9F0Ubk22%f|7O
zDNwFB)xH3xRBCrFp#x==$fy2=5*0g#Yj0qFmw}hKGCJ*!?=8;W?$Q8=uM>eu`U`mb
zgO^eG5U40UHI`+bJLFz_++74hydnDwu~4?8m%Lma*`Qv3d5C4S3Y`7CD*GDyNzF#5
z3J8?>LxBU@J}w|?F3);IXOSw7{e|`?1b{i8yK#IWydf@RBThIjmV@&YO}}KI>Wo~>
z<335RD>*(q8~JA)5`1T}ZN{XgM6EjX@N8`L^EGqcivD%AC&s(qmq^-tRVWI@+R!|7
zm&kF9|DhPs6*l;&>)D_uk-62N&xuxpKFK{ddK`>_COmRVyxc4&z1J#g{8-=A#Ue;}
zYVe3Yv3-X)ShFp&UXAtSTpl`dZ+0wN`52Gsgm7GgKH)-$3j6y=2C+YwRbOw0fSnGT
z`6KP(C7i1|8RF@M@vbAjYzv+#vtA%AlCFz23lhwJv6u69q(UNjcJGgpW-$InpUG#(
z3%(sqZ%$6Wo2SvIEBMoUJ7;<!J~fTeJ%2*=wxO_tvu}XUpglnB$NSbqWa()FDQNTa
zH<2?xkMSs8;b8bs)b}Z^L4>l8fvb6x*XVN@*l50VNo<V_kl)ol&^Ecpo!+Uq-$FGq
zjmx)#b;Sn-hcIKaI&ei+wAvLxhXbTX-;!(&lbjNJCgnr6F9S}MH1?$G4mGWs+URn`
z4&KZC0DOOe2HUB@8&IiHYRr?=H#{&A;Aq;Qn~%B#jJ_sEpo~&v>w}3YGZKpE@=UA#
zMpTdi#hHoPD#ow#lRORo>}?xQ;5pcm)dbFDyH_h<$;Uz&=Bs7wBvcS}XUpCXFx=65
zQC|!uRUc4U-a>YrHMdt?CNsX?_F~hddA!pHd%&F?KxvUD7EoYKg51tYtS73X=)T~q
zFT{!7k+R9D3*>NLDA1bdd3@HJZ6DN|)Q;dKDl#i2n!v6H&oO$UqRCRWL*!Ui{R}vj
zO}q{2y3hn-mIj#D?~eu=H3FttFjrD?V}FumyuZ-l+08yD$E(i7)FBi#0eS_Fw+FB1
zB8*!OC9=@)jOIK*thEp3F?inlIJ`gh&C{W$%*-E5U$_g;ezN48n5{EHfS8d}Tgs1=
z>rY5)g$)4TEs!SaM)D7(KT&pcPz*bcV00_`RkQ@^gM<JRFNnV>#o$uyB{Hu_zSNxy
z0DNTdwQvHEu$56gVCpMilhuyXR#^jSAkX6o{Nhx=lx=yx6=0dF)y{cprq5<eIn>OR
z<YPBD#N{q7@+$p&U74F=n_Cz*<r1fuE%F)4N8|pFLI#sy#{)HsY!~H@^`JHmTiH0(
zOS6<YQwIBJ@65(e+0txh5N_tx77snrn;Jb8fx1wRw*nduI*ZKGLm8vhc6PHq+*;M$
zg|pm=U^*-268e_U=Od(pgQuh;`g4?>_<%XPkh1wedRc0gR@_sO4J~$UoC=|D`YXl1
zPH!f-W%RL>I&MPXb9VKm!<GrWTI09xe^IX!yr?xk+t6J185*x^v*kU~)Ur$rY~!d#
zEBBCG?6pd8+92P`n?5ca*$}iK6UHAu#g{Ya6+h(<^-dwLhRmjET?5-8H@dl&{z{^9
z@*KVtf&N$WFIg=%c&z3Een1IvOilWtV|qjCH?|4k))!|s)!KQ_vntICote_K3+^A1
z4u~>ow*hxM7IlN5s4wBR(2lbc*K3oy{7d?B6mA@{`qX|wcp>|uma9#(ZCu~bQ(KYG
zIaS&9b19mkGt+jFMQWmHTvW;tQbpy7OU|T(wDW>uptCUX`5W`#pL6}>@(dX4RCk)0
zc+Pp9{^y}N@PoIi3cd~!E~`GF{(R?{Wtd+1#CP$K=t{Lz<Q<K<?{eH*l|DQEI5PM3
zkfLH9kA`h;1mG;5OfOZbg|u3FtPR;S(N^P$Mj56%-GJ2fJyk8z>Fv$ZAdZ%!rfsZN
zj>3#vW&=J=NjxR1=&z6QZIWv@m?d!<@2QR?cW!$`NIq^`{h+tkl{z`j(t&ZUIf7K$
z*;V2qrhte{r$oPdV(#U_h%7Lqxd9WSnw_fXxrTA~QZo9Bi6sCmNzKf7j*xyfIdEZz
zYQZtG!dgRKlL1!g0%y+bVV7Qp=v0XmBJpwZX8(m3T8nQJoMM4;#r1Xe!a{=6+lENp
z)F?reMK;oITtb7+eYK6`YEP+il5}@+mDQ6o4&!fn8Mt}x17%IY_}mlYmXa8u)EA5-
zo%McHls!HtGy)W<<+UHhDpW#KK@e3|O{ec;sf}Cr;O7ryb>S99G-A9D`c!u-(Prfm
zM@u_MXNUAEPsB*IMPiCR)Yo%+-_G)xo=a+)_>f}BcGp1WNSCA#FpD@osPlb!F@fs+
z3~NlR4q|xewfd1RViI~OGUklMS&I>9dPzbpR5lJ7o%Q`vgmSYxgIsVt-MmvRqmhyT
z8A)`F3KwC<a`Q|=x8=;ltYV<*g<*(bEr|A0S-&`PEZnC$Nb#6KpZ?Wots4gRv@s<t
z=T8&rK{TBPQE8b?3u~fu!_E_(N(3)mgl#8(^wK)}_=&WPoEpm3MM+?mL}}gP@!6WW
z2b%6c2IbRh<p}K2cD7od?ed76FBUPq2TN2zIUGiyIw3E+UrvO_F-LJCH+;$rXa&6@
zr_^&2dUG_UFarm>iY;SpUfHqRO-N(;Z1WUyOgiCk>HPePBREac!(4SC;Zpj%i9w&%
zi^Z*${o++AP<{3nmaOgn0qFp)pENY%+}tF}z4YPzkBO|P@ZS(n-?6m8)Go=)d5MPk
zI@enPC&<EWhYL29OFF;ovSeCttIjK8PCSpzvu(+PmYK*Yj`E`uA}x_qG(W#aH)Jlz
zgik%Rw7C=6Z!r(;47_q?4l=O~SeNge@lnOh!<TjI`nlyI*vJrgCY$5o#RQCac^<GC
z2o&5Mt<A0ioXKumy8a=v%*Dl`Z#MaoGWAY#Tg9qH7#XGe_G@57F#_y#Udfi>Q<}RC
zBvqm<v0}DE_>06|hxfBHiiJyvm&Ci0$m2h{Fb@yz`s5K9Saz@$2ums*`A!tG<XKdB
zdU{HYmJf}W)9p8#fC!P{I$LiDWPm?yZ>Mh>=N{!s^KsxS_bcOn=U3C$uW%wg(CBD`
z`XC+uw|$R!Zckn*v#$8bWN<}cQh3)Xj-9W>F(c|irwb)9L_0m4iSM7pj`I-A?`a0*
zOB?o%`T(#1shqKSe65YJ86vs8(}cz3@nJKgH6wcZn(8s{GiFRv%VoY8d2y3Z+Pp0!
zFP(mHN+fOOogt7sY!f_d6uvOu69TBXw8f31f>M=ob<C$fgc$@%pD5?4rQ|DkpKq!2
zv=*v<A=@ijWXU!F$5COkWiP2FMW>CrWR&!v?1>i3;uas1u|rJca;}2-b~Yn=qT%{0
zZ@itwj_wU+k5LqI^K>RY$Vj;PJiNi4OyVx+>~z+Cv6WC*isg9-^}xd}>t2<E4qV$f
zeP&KRrY`z&p|HNOi+-Q{MtZ$OdA1vICqrcMR8L!XKGd2PQebm9@!>e{L3(Rz(c&#F
z$yQ<I^XEkK<?gmXxI#0`;gT3`tWrD<81QF0UU1SVewfqZa(FhddDmge3qJnWpcSEV
zYbBXIz>p@p9N@f{o!y;L44`lH6clDAc*0SDVI?P3896;}&=s%2WXz(L;eRdQwcPHz
z@sKewAGc4GyMK27te`_^>!?^rzXv_JE+l{~-|y#f?VkApy;$@$m7)~motw!%iO-~1
zmH8bf9HIY-9S$pn7K*(TocQYK>-2Ynt#z7#gmZ(gUbVfMK;gYJ+xZX1LU5A7aHG(`
zMpHHB=a+YqRPEc|$qV9l?C$1FK9{}W4Ds=a8Vh?0f&H0hs^mExGzCx7owXBWD3{#z
zz+#|poc9<<7F|DN4g`&X9F6db(z9-(UV)qT;N6982b?>O67jz9M?|#q7I4fqJvGZQ
zx^*P}c*?!0<Ea&Q_LpB(KO0ovQ&eK3$J1x;tUpZ}HoVbK&u)77JK+vHMfIFxAJTfK
zR3}EaUKT96H}b$PD%nbVp3@?S)hJJ+_mo5E*Z5o4)2B$8L;7=c4IdvoM!fmU)y?N|
z!Nk*s&l!U3bM}hxn51Mwm+jl?SgWxSJlBlXVfsOex5FRFv=|zAJv?#GHer%jnn#M7
z#6AngvX0@RAC}96f0ryiSNfezh5qEx#C)RaajJbM96yW=hsy_+*F{xxjg5{d7-*^s
z1Q~GGTp4KW{(e@zY2Sh7nj^Jic$ZBliW60vibqQxb|)5>5`5P4x%IHyTKoNDj$AxF
zpRuxu6)NWE`!<Zc=k0svn<|*y{=(d(P+*6>LZD1dadM4b7Uzo0?jiDFCERRz3JXkg
z8JDR*n|{vh9cfDf1|Xl1F3uZ%t$be!UaAOi<hW1y19b$?p}QBFKCo@B<>=ZRR@1x5
zFtpXyn*EOG8!Noc&NRcE{H`exhALh)#UKVIvLb<RMeP#VEzwroA>FRyGjBr#kCgN$
z%yWn_e;kq`R<b_WJ)`79LE^6P(BAsEdqWWP)Ewu~!8B-1zoVA0GfFs7h-V=)y=$#(
zm}cfC^e}5-_<=}A!0G(=V-?rLj+zk$ITgE8?TDasNW1HiTy5(?ZhBp8cH_4e3Wx8{
zj>q4AX;dgyB1Z6B|4uiHjal){!oZG<P&c;ZZ5xOsMmFWD*qjvJXL^@kW#q}-?=-23
z!m~^b93}DI@tXUL6FpBv)|IwCwa#`Msd%xYX*@XA9_Ww_6>pxol9&fA%|=}VA9y#u
z&?}eldf1_^>&E^p!ldR%h;^7rKrC??{aJ=ju8-gQY20_*JLIc!;4r6WZmq#7hnzmk
zxzQ3`_V#ayUmUK5c+0+{nMzXV7asHUTJXjkCvxp}HRJMLRw>$_cH(2ibDP`yTdq01
zVsbUybe{fl9FRm+dy(6h(`9wWUuGpN*b+asBl{qUpjdx(S%WU55cJ1M^s1B3H$*ce
z*wd6M4ZO8*r_Kdx3$86%A%!z42-6tL$?nOgsA>>VMJ8)HZ!Oz+!_p3zEUxnyNsd*M
z+Z<D<>uJcHObxpbC{t8NqUUA|RaY#FyR~+4#)tP$xMn<Wv|n-b&$d4vd<fV6UBs$Q
za(MhslGtvQaH$>GO`%c;_HjbrnVmMP8&t!2@HXV+UZiHwv-CaS)*{pnHdgG=)ExfE
zs4L4J20_kOBJyPaYcKkqyo^lEeI~__zoGABil^R3y-c59%GCoSL}Z#haxUkfLtFpm
zN2;C0{3^_~44-G??k5r?7!?bi(F<(l%)Wb7;*DY|&GEeqlzLy!#J=--pkh{j_mD^<
z>nSVMXXb%67qS3^nVKam;QoT^5uh1#`34@m^CvR1X6RI)fm*>`XF1RA)ARJ*Gz1~k
z{#`$SR)G}SbgsS9OHj2AJmJ^p<=X=uT(`}j8+XM+!6EM?6}F=eUi$lOT%sXndE@sf
zO_AyK+pBwN+%Hz&XzElR7Ce{`XI20NC1>&N@2vB_@>ZU*_7NA6X=>vqeOGB|oi%AQ
z`Q$oprDI$#<=AN5mV{2M6=OVhqOYlM`dlk@imq{`;oizaM!~xVUWs!z_{H*tzqi&b
zF5~PSJ~!pLeVc0|rl;JEN33S-NBa3p%Ygklc*_u(!P~R8^~GtO-u1n6W!biSIIw~s
zU8A4&a4ent2vR>~q6yOBdsUY&Nh@%woEeD<GMAXxVo6`J|E_4hi=>^&CpK>hJeH}=
zH<@IZ%J{2b@wlL+%wR*)xmUGr=~-WdwRVn?(38)!-Fu*IW?#)~(>5P#;~e6M;p+|e
z+(;i!mHM^9Qc7|(&eS0$SgbCk{LIm_7J_&2Y|R0WE<Lkul_W{Mksd<I9)pMa!zTJ`
zqgBjo!kli&){HxgI5Rq)k6ioCd&!0!I3=+U4zUMS3eHE#*pmo=n=hPJLN-1vu?T6%
zZ6hv1^iLCjXA;}~F6E6Ah}Q;ACTqfHQ`*Kqz9ca@zAD|IL>xmZ=f7~r;*rF~ApoFv
zEw3rP>M5;QUR#_TeJ_(~SrdZ)ahmkB2ZkP-H{Zz_5jCVsn<;2Fzp(|P_|dRHGJV^R
z>>Ge*{oz@06|*x;TgmAKl?<%Z5}<!1-?TGD!?7X!eLze1i5}nF@rY~Xb!8&O2cP7r
zwoe`BK2^ugNM+OIufMT^O4xAUah<c{O@8iqK;n_=eDWz@yUlj9eWyP{n=I$f6C>)(
z3r7REXsjKdZSNl!X6W%g`sOn?$t(v$oOh}Urx1f37q_?ooo4rypzWr&L}sM!=x-@+
zWb+J73xPR?3>E9cU!IQbCZr3?wk$}!ThW^m*QlRelKhdkH|#M-iK`*2@mD?H@-kVQ
z;M|SZ6vNB04>zfl?DkxEd#XYN+AKZ>9#yf}H*KAMsE~~If|hRMakUFE!uY5QWo`wT
zWiCZu!-h0YMhd<>svDYOQ)&8WZ%;&RLk!l$RNXOZh`Zff<Ak_2bEnDtb|+@}<T&Ki
zA@7JsS{uqMMvdou2y+edpo3#hjA`_AVi6BcYD<abHtXkozKJpfr+o!37@lia&5reD
z5&T)tqso{ZtQC3f>o@H?+azRD<#99-o+yVMF_yyh9<sfAY>uDd`FJNUEN#bqY5q<x
zG0zjhPajO^&FQ_o7pX;dQ-()pO9*MFat(RZh=+d=--8F7HslO%N(w#c$#Xo-7N?6u
zh-#S+T#wKi84mEJ5M-_h2q0FP1Da09nzJe8cl7UiWmn-P7olLwz7N{%<kkH}NYChw
zs2?e1y;LL$K-epOD|$tk%L{q7*WY$qfvo0;ece0B{_N>%?`*Dl9O^oq{p<*zVPtlJ
z2X*Cf>yt)G8Y5r4(pt7NNvImX8)~)8C?7;9?WQHf-#TcI=g;JC@}1|%g~}w+yRdU}
z{V17po=VkcQ__vEt%V>Zrhb5E&(%+`N%&T&o3SY4>0N@IhotLA<$R=+uGvA802y#~
zY4DbU`*yiyu&z*yPwVPxF5n)u=AP%3%5COX$}AGB^XiQvhb12isQq9{lJlNn9wm30
zK9iS{H~o=w_8#xvM7+zE`@m6rytWLZzJZ&q2AdB~WS%99iUZ{VK_}gol6&rTV?V6g
zXy5Zj)mNI?<W@d-XEVUM>)XTgPr76EO0IX<m}@eyTrxRT!{u`e^wHB>Ua9;PQ~rAW
z*KId5hrA(2elrM(?S0G?-(7|cTaKp4sj)sxj0m}Mm)BS0mJ0rHh^;~Gh<$zFcDAfM
z2oAkUP74)1*{j4VD{wq4)KWQS@{REUMFC(jR8u&6<`xBYz3oViNA-5S*}>a+O)s7c
zv)xaq6KOJP-M#;FxYb#g0p>YC=0CrCSBbSlhveY46V1nFbr_Nu$Q$>(QPi-i&!4da
z`k3*ga_Vlup7R^IS_p4FztVeyKEDm}tZF6~#OqsRZ7=`0s$#`<jcCd3Uy^YwA!8Mt
z*yzR42wzPy((`ZK&v__DUh9|{_00?Nzh|dOeA@rnQ8FnJSzOsQr^o593^(X>#lER9
zG|C`{_D{?_74;}&swcm?zYkOyQzsZ|y|5Sf-!0734>i?FaZ6viHN59R`rAl19)Fqw
zBhSMUdT2Ck!?#BvpjMpqxgFaN&z;$!J8x1cc^mp!QnE!NN+s+{+D|wNNOR9UylK7h
zS<15$=MLAV=+2?B3jN(?YO^Zm^Ug|dWNelUaK2!kcd3apZ{9pF?CrOSEyi2~SoOxz
zJ%=;st!8P@XKYC2X|s81bY9WHUnoo-YwjcTj)HZJiLI$O;!=YjS0#U~Rauj%_sP^B
z$}$cm=0<u9FXpl88{2GoJqDhX4vsrqQ;#>=?8Q`-)0V*%cPwr-WYUl%NJm}%rcHfC
zXVTMXLa)D7skGQ7O-y_eI>0ss-lNX0<ejzZ1?q0O;KH<<*h4!<t!^rsM{d8OS{eKH
z)`O8_PYf>TRH_efBzRB0o(nLF|0>~m&lovLFIvd2*f8bq@rai@ML!+RU~K+vM8P)m
zhw7Xl@i~H`OwG*Dh4Cnd=Yml6bC^`i@uVERl^m;>e#UO+r-RzK)knPZoZ$2!VePg~
zk&v)bJ{{<#Dl4zq?tZoRT@C%yWWftjW8;0o^lxf)-rY(;gv8MSH$IHfxI6?$?8rXI
z+Y*w0@6|twZ0q?g@_vIUs@EU$b-{B+vMaE9=WpFCO=G}ikrlcrJm!J5`N&6Va>%aA
zOn)bL@_QPeRr$uVoUe(Uy9KcX=3l-EGPG67E(Ya@B~^;f2wxhmk+o4QVJE(@Vf+z9
zb1XEJUC`JrJQv(}L1YXwZw>YElpJf~GUU0Y24M}IW@&a&%|fZGEPB}G%O=wh#Os@{
z%itj5@2?_wpPV6NYqSaZ2i`<g>#P%8KP(*ei5s?_&JXovoN0d6PGI+xu;>oC4$Pj_
z)Vw*}VPnu=$9rGDXoQzsbDZv4COA*Kh`PIA=#kz!lp?BF*K@1>xzG;BmrYL|cGu`?
z<Rq864kY>>NjzwR^TB3nZo6#PZoa8an46<1mAHgLPEhH-kHMjpXqs8td6Uj1spDe3
z_F~*uGpacQ(lrG%iOWv5yG=ZwcvRUaT9pdQP+zR`{>VfhqM+HU#nuL+bum;emiylA
zckI!l(Q(}yx$l#!k^Ua7$O^7f_5t>=8);Aw9d_={O{4PAowf*_Q{TgDcv;5S$R%$o
zXGo8>O>6_F5Z8gdTct$4blfYJ7pMC_&fYqps&)AvmJsP~knWHYr36$`Ktu$jy96Y~
z4FZC6i?kr6AP6WeB_Q1)5)w)&t(#Q9_|3A8=bn4-_rC96M?E{%Uh8@0nVC<_NLJ<k
zs}~vX>AmzXHzo{R6oA}h<lgxt9l@7Ge+2+I0d5~+OR1;nuP5zj(Mp~BWXCP2P>`HS
zVmo*IbNe$`<@3{LR!z<Y(Ik|uKX+%z2@>7Ab4yb8Gk<(?1%GkH#ZUd$8z|5=en8_7
z0+>xM>ZtUInZBbNmZL9k!WrNtf2qkGD@n8&vs(rRk~TwSZ(<B1ecla5EE=Iqq+y&4
zuZ4V)uFQuOt2RkuuUm3KCDu2&#jMR<E$Z2CP!4o-3pe$Afx3_eJvgvMzW*MwRSM5H
zz8_sc{iiT%e*-sR*!<qOEj3AdTeX0UGiOJyMEy~RlvYp3fOeZ|$Uj`a@&%^mRGMm8
zcL>;+j6)993_~cILDfNp!1qn%4<M6MwjBKkqon+-Nrg$r<hze{T0=Wo(AcNg6*XRB
zJ_~>zi8@6Wp-{;pt&7a3anD2@UA*3ea!3kjj-AxB8!d1k{6IrmGt4CW=xuF#_4Cj*
z+*Q(HS3w!T4Dg>W!)N=>R#cLYN>O|5L8c}2R?#|SG5T9-xE#^v4!N%z%eo(na8|s<
z2eEPHWZknS9rps`{pV_)dre8{IP}E*Oi0xInUBLfOxw#=af<X+S{(DxBRAyGKWmSY
zn7Gg$rOx0~p*2%j5|uy2(xBjEMIT-Ec0m`6jOE75bI1ZV=AXq6$D&7O+<F9)b?!YT
zna5xu(Z_j|BU93bWh~t-2V81Q?aX(tWC2q-%YUFcij@hwqG3Dvf!B^k=)*Q|B&bWN
z_Uv{<(NWl>n1?)i_G>|9jp2>U{#aM?9k}Rv|2G6)hq<1`zs*$*nGgO9<w~@|C7Dna
zXe23pBG+tb_wZR8JC_+VAIK7P3D6x&9{wq{N%|#UlXuj`E#ztfWJrI`1!jgEt`DCb
zdT96h=^MYgyA^RHGw3p`50_tR7hGc7MNR5E6jwf~if52%l)vPU%WHCHW2ee1qs1e8
z<JQt6lZ>1V)E3{}s*z=!?)!S|>+(yt4)F>(Pp})ANI{m<j(T3xg9@NI+Z-uiKcH>>
zY<gZJir0Zmx$SKqEVH4|=<W4$x#UOm#&Fe9vMSFC1CCCpc>CXgo|Rg1#09+P4*Jip
z?4#!5VqG749Okmg`hZqHT<Sy@qR-65zK~{DhQ5g%id(^@o^qkDWycxGaia9YRv(dt
zM+a2na78fR?gRF6qeo3TL#3#O+?Q?E)R3(hhf{OqsPHG8m20i8UfSuNoAg_4LVh9~
zimOOGZ2`dzmc>}LM%SvN(Z{H3lrQ-nVn?Gg_**i%lb$KFYqIC4Z&tdi#yVGaTl8KT
z*?FWZc`uA1?4b92MUSX=EG(+m(s@-X-xC;)3-L`La5WKYpo){Btby*ynO8y*47@jB
z=%*ux;k1lQ>kNkObkJc6w52WN!u%=w@m4-VU3&SMYgFHQ#!M3QWRz<Xe1=T<k6axC
z&D(yH9<Zsz8I&0|l$u(q8>(<Rm=^Ie=r(SjxGkRdB-wYy@JRy-a~$AV2d?{>n{T;E
zh#j~DXj8Y%Zud6ROod^`n-`CtC%8VjQ|1Wg1&R9GxFUD%o^wg7Rlm+QB=fq<+4O-$
zfxnAp!amK~L|}7@b?-Fkf&C4ygU{3jmt@HbXR^73jCCbm-;a@em*RgLUrSkEOEIZ0
z0DnH%gU+0AIQP8x$R(|7d#8FXr4OACkn4)Q^+}`c!#?J#N2>Y!Om(^@UT<AMs1}T<
zR_ITTpFlA+b@*dxIk(af2$VF*QySOyg6dol&MnF8i+c-3eWGZfrPv&{4Vlg5-7_le
zE$7d;&KW(YBUfW;l{Vk+?2T=#sCZ0fSiU48h3JX3VmB+%B8~6bY$!7^XbY{EW4+1V
znuX@}R;%wrBl*OIT}WZcQuG5~ox&9=V<`&@?t8G<qNY$DHV6t({#*Eh$Y^FW@ADZ2
z!dDSyT<~UGSZT;Lx7q!~DWNi7z?lwb7|*jt&FgV!(v2Ez8MqP&Tzul0w+F#cXTm4r
z6X{t*bPO{$q?<uIPfn^kzY|CDne4*+-U*YXhYy6xj$iCx^Xz#rYoK9zMqNFPwu3Bx
z(^2NCHjFGO%uFuOOiwx}K-Dct_0(Aw*V_+dR=#L=t=v~h&l_HaWAa+<y{^qA+wEzq
z%f(LZtGZLRqkJtAcnf3~WRjKJUGO{zFjkQwHmeOAB|mBruj4M0V2<Ang(<(s3W09D
zXQJ1jPO)P&Q3F?dpI0;okz%4(69tV6h?)-jnYA7$jz!Bf@n5OyY+-8%l7<O|_wBhO
zCIsWJLZwKA)B<NxL$|Hho=OWYvNo%Xt{*GR@U2f_JriXAtU_@vRW>yAxnje6OZvl1
zIaO!sp=Um9t^4F+0X9=g%#{tw%K9Qz>dtNX%6eOlVe07+wbA2D&^os>*5LkLcnh**
z;UVGftpB0r8t+Elo_aI2$z}pItted4xr&>6J*IPgXB<pSXwFIClIK3VGFIp|VQG|V
zOAX8|P3A3JxqWQPsaeYwn6)IlDRmttkc9?p&*%a;cW0yM2#J+1rgU}S)24HrrctBM
zAH5X7e@->LA=-1=<V?&L0G(55riEk#J#vM1)DkF~oVTqgn-drOhMfB8Z0at13@KtU
zAD<&DG%*Q!gsR@3T9nljvKMDMO^`$1ua9j6-BMlg$DK-LCa`0@;{>$m_w_dEC@hvm
zExCN|SSu?rd=dj?%TAZhJ35O|b(ZCg!q^QdX&<X7ntBL}VhFKTTpP)^Y=WgoQ^nI1
zo>^GBYKR`qk(4>dcIT{r%khVRBc3@sFdFZ9#5---c29+FR@OIMBzDUxYn3W_K$6Ls
z#yFX(B~e53RIP^0*avS#P6vVFdXo{Jymc)sugq7w?fUWquvEs}Lto;k{wTMuP^b{d
zUU_(ox`!ssXD{*w8UO~#m)`k0y%rmnWF+QhN#4>7BxY{Hk|vO8?bRGY5t0;=zDq3i
zpWK2PtVlhHXC0rvh_rZeBQ*4s;u>3q#ROhMij`Cvo7YP2Y^0xptq<KeQFFQXPG5ww
zLU>cc*V|Nbq~;**yJl894TOZo**Pzs9=$eouVyd(m!yoOADp1as90{5*K*d-rnR*R
zP7AdBb(rTJwB7PPd?@$pmRl&VHYr}7*rp<X^K;#84b<oDhjK)WX&#bm*UJRD-G3hK
z-z!Raeq3ZW+qJ$0uf+cD&d3cYro>9nhE_FilOP1roq$${PRkq6cC2_lBf`FMZ{MuM
z#Mpo=Gb5rbLqo>qdD<J<&!dai&7R4)`s|;T+w5?SF7mQ*Tgnc_k#XmIW+iS>sO^Oa
z>JVz8#{&sjx3}{bg4ASI=k)Qln^<KWtq)IFNAe#22D$dj<KJR$e!g$xBSpTuZAxA3
z2$#0ZQF8!ESw@B%1x?gU%P=tOLm}yFG>d7EXU&>_rJ+Tj>Uuua_0}ug7uGC!_vP|h
zwszk@tFk^nq=l}Ip<hO19uis#6ppNG`Rq3z`R?*PP}p+lZA+7mHH9#|%!z9JvhJ*A
zc#hycDVGx{f-y(}qsRHP@#4dN^#Y6oh^@^=V?aZEate^_qkN@4B3{hL$I7-Lk1Eym
zXsi~+d@bRgD#G1A+`l7l@?_%GWACaSBe{~1?gRC;8kH<Uj5c>`yHs7kz8+yaKE(zP
z|Ho5aSyP=@SlFySJK?B^Q0J-6uPb_u_%f(T5E8g6GF2X8E92dt31OC5<|Qxb(@Rbm
z(9`p&>7t1VqA@lL=-}C8cYJhdv$3I%qRc8$9H@jJ36#)hyuPrxVcKm0%<X2e6}VTH
zFrS#2Q%W0jb~{_N20`wcJWf<|%7K~@>-|E{!bA0j-g?kdVh$fiI*8U~*A)%+C7Lhh
zIml<}=P|RMSyNz_hy~?JCK|ekif77mz9h`MBdPZhCKSuy2l@hj8FWeeSho5@t(Q2~
znJ`yt77!9VeT%A!qb3j9gvL-VX)yzni!xKdO2X`jmD;r65)xzsn6ggjE<H8|V~4De
zlT^B!Cq!90^<d>wtZs1lj*a;KOzmRHOZ3Ta%ctSZ6lc&(RYvf#Gd2;=2__*_JeEAR
zqRu;%1BqQ7N8}aPNRo~tH1&c<n<BLJ8|lcP$!4!Tr{jQPZmY(2=IM*z<Ln*nDGw3d
zVSOVn5LOkD)U#Cw_^=R~m1|(`)3e_4!`Ol7BHPe6O{f#|E$uLEj0Y`E`#SU6=fzd)
z^$1jdk9U3duBqdToBHUPOL^HPYIz;y`g9w{qRwt<QX8NB7mK>ky3SjKNd-)i6B^z(
z`RDJ6IYd2J%Na<`H<fO6A9u2u-J62JhC@-|S6<)emSt94LbyDaTIP@ht=*cWmHvr)
zI+k_M`Hy1nF>qSMv(%-*OJ&3EM6{?E4%<4RSnq|}%d#;-cf(7=NOB_cVu$-=wt6>g
z^oovbLL1>wcU4{D$JE0UPULvotXZhXB*fQ$;;zp$%_4a7!W6i0jY+K&_0a+!<1;^V
zkO!mD(V?DwUvZxof27cqAu|C=n98A+6B*r1K+JDymVMl{XR(HWqCWPC*AqT<haZXX
ziBt5>3#<W^Op7Y_JxUGc?w8RZ%+iy9+5@vW0T~r)#!MYH(V1y0P0S2lG05a(tc<eG
z*e^);Ryew~jmDG_$l={hdO@0}b}r@J#w{gz`)sqDU+WcaH*|O!R0Q0ihk#vG8XM{N
zV$f#gXtJpX90l%2V)Rny4LJmxSzBEcSL4(>&MIQ7Amc`}jAb4y5E#ZB&cGrgVtp;;
zJ%`27U}n2Q#pJKe)#2F1nXG<ywbn@?ElhPUAha{$Yln#9xT4%Vr<R*RRu0jOHRfV7
zVBAcI(6RHZ(bR`y)ibx6PWY0!vYPS@NUatrYS`w@2Xp{wh)smC`Lc{Oo8Q9tjBB)I
zo5=kEZJ`(vZB~}g-N!Gp-QEfeZL4-)%*I_j_8Ps!P%kXc)6r4_)2cb8NO%zdiHCFQ
ztvUBFBf9MiFs&G(ZO5Z~PKQM{iRcqsl71;$+L%|w7CJH08hC6UyCW=gt9~I%z~oj*
zh~Oed2QdluedX9=JDl$8WyC}YLyyYNF`kP*_v66&{-{~v4>exfTfalMXFmh6^4qfR
zf$nF&(~eZh7Y6Ed(qpcTD*FbJjJ`!wlPV+SvS;Q#(lu5Z=wQga+K|cD*)?!lgvUb&
zE<noW<+!g=#BHHDFlW(z=A-BUHM;21!bjdl#UAfkUZWaf&V`R|#g-BIITSkjj-_p9
zk~&oD$Ee6esSDUU_VUuM^4&Sku2GuBHXvIrko)Ytb#8bIYLvm5dkltgI=MZ4=lOJZ
z6426wuRIb@vSqMj!4_v<5jp5hIi+Y1g(^kN(^;g)E_#B8hEM$@g+u!ZSuSS6XkNEd
z=bi@6_lt`h&8QsMLKmUE&AVbbR&v)BWI8G+-YytFk<#)6uLQ5mR2}9Pjth}cg&27J
zU?Bv&XEoFC&NyVsGUpp{<a}5;ingy=mc$qxp@W0Dyq8@9^EiWmJ1v7w6ddLi7C$T}
zvzOzJ;NS<=Yj=`_CaGcQtGwUZe7F<Po$(8cs|?KWg8<dzczJ{6I9PfpTeI^p0KCkR
zbQ;}$V)r3*oaE`dSKESVsNU?4K5c#qM^00zd`XZUZuS81vuM!yl|XSh5@ZyZ`^EJ0
z91WJm3)paS=s|y2rMadjCvp0NgQ*R81S(%%H2>7<$MCTZh>Bt*8nkoGMMrA-68iK;
zaCN;fo|n#AAL)p&RLXp4(l<WP6Uh=Kv><t%Y<=h?GRT><yBOrn=0l1!MCsgKdyyfU
zP?BK8F?i0qksg@~5Y;E1*Val~_DDR(Imj)cgnBp6dBl2_H&)zHl0?EY@m#}LPsMc6
zmvg!9*4By6DZk4jJ3NeNLV8S4*qWNq6iZkXK;<lb(u?kcLXFaj0vbbO^P{@={!@cj
z-@bgfITpd>;R+$mb#he(CdcE*hIR9WE%NwKCQ*&4j1x<*kxK34R5^ZG!BdXkl^{WR
z-XT=GO0yn{OL=Dx$1Uy6PUS>`-brjFuP?`iWCnEa&*Dy!-pJb$TgB?AjPuNODkWi;
z?X<p;mAp_LhOV*m{krqyrZtzwd91Wl7P9f;0fhpZTjw+OI3G=-xtKjSORZlVTJ&>k
zJUY4TU?$zoW3dI;nCbRu@3@Mt<LS;_i?=Gc>D~}b`uKc(DEGzw!tvn6ndnOiB;~7b
zEoDHy;=aP!6D7pH-*<d)t|_3wq_SEvxbIrdbme<70=<_(Sjt(E=X7Pp6pyq#`q{((
z8IIvlb0#Yh{{{W1Wa;6tgfu%`d$r4&W!61V?_>S2sHZ%hCT7e<cKlp>^4`)i@Q5^R
zj&4ux$bNND#2SDOUz74huc}Wxgcx|Y!s?GJw%q(!_vWyT2G8Aw_~q{Hd)~veq9f*U
zwT&P~P`p!lst-v<@p$%b7de_E{q*BKXLV&|tNZYst!Yi(E<(H2DBcIvNk|oquks=!
z)tcq#=%X*9!`Yk9&5e$+JxA5OmU{BxD>Pm6lPl>>ZPXvztPJAJQ_fS3-SLQvUK3kw
zMzEAVDM+oZ%sNe}5ZECCN;@0c_O-IMO$^uE%~wbAty}LU1TX3<)b#E?_d9v4cj986
ziBGRTx4O+VZ~PM~`^MeMC*oEiuJNx#mk9K6Xw^QI+`XpjQ!Ly+EIO|%XjmMv8nUWW
zIB~K4{K+ZXq=F~*p(h}}I(zVG()(x9K!=%^_y;8Z^N(2{@hGpd-bzubUoi4|n;2j}
zT84jRv8R9f`kPbfsJZYj5rXV5Kdn38WY{#>-W8jg^Y>J@mVoS`qB<!v^{o2#edaGb
zGpb+82iFAauYCi^f)I#y?w+^Evo&S+tioU3Q`-lPCe<;*xMd_|=-m!OM|gAa@s;%m
z<P!xz{I@=<8!NNfc_`lFoo57DN-<`vF6yCXa@uQCvw*|RzyT-Ex0QOl_7&!6Up%2U
zyFK3#E0lhMHzQDSC`$h|acV|s)S<lI6r&20Vs+(4X;9!}vDMl<(cfMLa?_>4v7%>>
zvZJEw9n6$oBpvy(DEF;kqSc+NywwQ!pd%%s3~xTaJ6YI;T)9;Y(Fk@yG+e*f%!m^p
zIfTGK@ZhVu;h70YNps?9f2BIJ2|g{`*=BAoD{5ASR*I8q-x^_pJ!VFfDM+aOxz7;J
zIK7A5g%Fu0Zyg-BU~8Tfd+x1R0H-<dY^Iz=t|k<mO|6I;QhcIUxmI<5dfF{hzi_%;
zm4!<<sLfpjbL{up|LgGDK89h)^%H0Yzj5Jc^ZOPLVLGpjcF+k(ai7-LBBQ}k+e6AN
za1fulNJ>s~N%v6LC!zkyHSe-6E`G;3fUS7atL&ORQP{JbQm@PX_J=b=tdsP)p7J?A
z+tEdVcT}NJ??bRwkZ+vH-9zTrr#&%gg)cD@>hf=8r{&cKeiXBjb$oB=I7^MGLmk<Z
z^ObAj3@71!heF&fub>Ok18C8v(MNyLy-|S^3w@dLV~b$bINK$@SD1uBOxu-^!=K~}
za8I=lWCn1f1oBcP^XSjn#Wz3CGc$A<h><KB(qTaK=-00dmB_2rwhb&$Av@mIg_iIH
zaHFFxcyygImGY7N`5b5}pWrwg7Ul<<xz{e>(R8GhU-H!o4tLuEmMc+_;d5#Z`Rabj
z-cv1L#K45e^FoS|9A>2c35f5m9mGd}Ze(Pasd)QvDTekjm3903tLUtb%ddEz89F}`
zF9I{G*;UhT-j`uVV0GZ$u%>;`(_h=KF1xp|Y*llRzRDOng-xVD_;Tk;*5s+%A5)1K
zxylJ$G2;n0b|-y)>^In7!qvE!4S)abCp2+eo`!53Kxz6%M6VUDLN({-MPuk#>Stbw
znSvu)0}>q3bhmEaS%^D$U}!*ql92)fuV!_zz66K7wFFC4a`TBfx+p(*97Xg7dGOjb
zAG&PZLx@h72gj92yMnbM9CPLKEa!1dl2!uLvB-E3<$m6<$reA;!tntJ>by}*r=-$B
zjDb<gXi*Js<0e<p5LNxkpO17lF=9=^7Et<r00Zj;Ta@>mFqD8b#(qOhpj08-o0DQT
zaUDu<O`L0Oo^%-GRC%$ipFYEBaugLlAkxt?2+|xyk8DnAPpWfM*$TM5^t>xDt(yqt
zr~WQCHgTQ3>uY4mE_g5D%YB|D!I~$tkO)%8^=Uu{3vooQJngG)PPm1uAo&C#*6{Q}
zVo%xt$co~vX^oT9$MN|O9!7N*=R4KQQ){Tz!k!G(so*%}tB4`~bw<?*e2`-LP4|U;
z5LyMj<i3(C5fA(BQd*twjj(5=ydrP8oh;OIkMDPZX%sc-7d8}U(b^`cc67@Jzpk7Q
zh_<?EQy1r87GQDryc^)4LCl|Lj?BWU;d{$_%vg9)8gpI3=kuox<W7QvG@f=i^y|tx
z&rX<!9;HU#Ns73-d{sX-TlI;bHQy#UM=3Z>fWcYL{6#KjsdYe3l8O1A4U7*xDs=|;
zu@8E(%}ag(;+SZzb2qb1qBhjU4B5RhdFq=GM@iOYRgbL4dRWu%18N7>+~ud@<WFPn
zH5%d53#Ve=Wi>PT$D#rhJkwt=0SW2>uQ~m>iSpze9t0~Ny><x&DwP8(<0}QZ`)gwp
zv%xHLY}kq$&1z=Y{NQG1lmhf5%w`;u_82-bE=CYorM~NK7;nMNNdt>5X_or=!~Eva
z0%-pD*n&>dGM4cgaPsT1B2q7kjW+w$3lIxKnct5-fxpqSE9AEkIcxZ4U)NkZOu=Hb
zh^(JA%Wzfui3m#LjCv=fn|fPF%CVkLpf4`O#8%+JmDM%%-9bam^?<Lai9mxc`*Im$
zy+7y2I`bPqc{yfG6eDux!C~slESqzB?W>HziDb5S8Yl=BKLH_p@`6@+OBmgqRH$^?
z7OwWKgM-b9LFf6y7*o((p+juq{CRAX^lk8P8PQD2Z<K=4tVy-rBM74+Ytc-7jxofV
z^C<Afx$lBqhwGJm)$B?qZ(a&p*-$TYP$g`>$a-lunFq%d*L~+1-r%ZY80w#<S=>e-
zD_ZUz+{JuT_WI7T{=*{2zOWt(+=#UAA-g!c7bV+OX%l;D7Z=Kmo9RVp1Tpmy7yObB
z0qwrdSd&znhmME$%A%+x<0AGLR}9VE_4-IEC&pVhAMY5&=VRS^m*drw$3M*F$i6mT
z+vAzKY(jQ-m`lMNH>3xhRkYBIO5Wa0idx-1@sTM#cG<1wjcVw~gfd~>;28qNk6uw5
zl`rz<tCLSB^qtba=qO2wFl<h~C294w*-xn!3Sh&g9H7d!qW^OV`6x9B{`q$#DN`YB
z3cPIb6wQ@GA>+;=CQ!SS(=~l}a$%8k8Tu%XmQlPAEWVv0x0$uK2d}Q7<7Oz)vukcZ
zrPmv7XTa3~JecW2l7V*UMY@aO8y6GtAJs)3%DS3EkJlM{vRpznHMs`Y1;rbMX6cV5
zB`NI%K{F*DCE3IpG|>Z+H^UO<JT~rH20g4r$(gXbAyS$9G$Uafq<QE~sTiSJrBB0b
z8%9HH1iAN3g2}6d$<Fjp-*pHtfYu-BmE#ssvh9>NyXtXO;}0sUx-^;3doSD<`nET4
zlNiZLn(9b1+DKDOT-&H-L5{G@jc${*pc?H8E}eN0e;yxtVOH{oLT)F2wr&+DJ=J<=
z!}GqwN+62pLkw^<EqKL{>Lzph-ddft0<|yStM;SVdQoZ1Y@`>9YgZm&1Q4;gwcbMw
zsXr`ys`*u-59lxRWp%!N6@^9&cXkB${oXh|on~VdWzr-&&E-TjohLMtTV}Ukw%o9p
z_yjvBPI1fiRPLJ@x(x^-0(Xl|Yq!TIgihZ(Bx{o&|6qN{W!|WlFYEww|AMZO3GKX{
ziO~>R`hE5NyB;i$^`-~kpMPVk8&Jm@a0ecABDRei-Zm44A;-`3;g1AR{sToxFp_Od
zDEsriq^HU>hSQFf_t<ko6*}`i$p4O_ctcgdwgN1A3BsPDp2wi^Ppg-`r7?0j?e?@b
z`ugpa^G8nF0K7|_w5zYBjzRxnwm|6hu`;EZkg-#!7GHpu2^^8+n+TUOUMOugb62+G
zWqH{8mBiqO)Iq$s=qtHB5vxiYL_3VQPh{-LD0h}Zf_cA^N*=$M49Q;E%Dvm4!$zp#
z>rM~|BHi+)LRQR^)cc~Z8<e7G$u#&=-=~e4+;C#9Q<seLuh2BRawPUV%Dvg$c+*FP
zvu7$-(E607HT@Mf(&xsa(oCXH$1NhQE*Msk5s73xZ9`EgrIuCddd7&1v9-%7G*!N^
zY2$1Q$GS$;`$+F;Fr1V27(%*T-mYJ{K1Mpa`{fO6vF+WlH}TthyKq(QDMD|YFlguT
zEukB5P1pf*jPfg}SrgJBmlhm@+pd6&6Cp6|<6P(($ees^Ox9(OlD_e5zeo|HIy@r3
zQMacY7h<3ScdJ(y{M*y<z`|Jll`Q;tULI5d(1mH3-JdaEyB{n2=$$S7;L7>uL^97Q
zz39HeG6@2;M`h*+DByDw9wz(Y&=aPB{RX>`4v~$s#3bw;y}lj~2d@l+N8zF;W{5sU
z<;$MNJ$DDAuOUlwqI=>Pzf@=0P)rG&sN8qyD~?u`JF=ahSM=4Lk`HAiZRR(~_I^M<
zw1_69IikmxI2dMzg+Zj|$nES4RrBF`&h`ylR?<hU2gN2N6`v>uZXiiRqm2{%=S#D}
z(Wdd(<OjNwPNOCUho*|2S@~frBYh50o~CR~`GBMS1nmpns#`!=-6ZmmnM)IhL(G1e
zsDbbsqxS)R_Em>etRb%b3+2(xs{+2Y`qI%0%7n%rOmJpB*sIdIw!VFR^R3Y4$t9^9
z&mstzmOQWT%Zgon3XCC6==2GQ3?n={FyYkr-VJJ+;l5CC3FjueW5Dl9{kFUzo}Cfq
zGxbBxr-3=7d{)B;pyjg{WljQ06bi<1C#(up@{ls&%N-4oHp<zh`?2q7UWfZb^ZO}9
z+cqzj=Ebf<CMfo*b0L*i4#2u}-SSbGS%GHA9qDO4+WT~GAg|KDX7vg<517q&Vkk%D
zS&_O2t46l#mTj|UmF!+*=rhmoe$oU4cgUnXzVQJ#8<HE&2VdD2oHBRzlb|jg>{URu
zs@-_dqdsoNVzP*U5IF8ng%~n$ma*3PFoX*1Cn5D!AInbLC9_1VL~C>=AnS0SpP-7P
zd|7p$P{zEf<*HyBI53(QFP(A4y`Q>SySAMl95trW`el*MHl1uG+<EM>Wi-9+1cf*_
zQhXGd>RQz?ayeO17ymKUwXV+G=_n2jRF#3NDE&Y3+5lqmnddW*6f&n=2`mUY1}$?J
z$k&fFNHu`LP0q4id(2UWtAHZ7ywp;@R$@cV8St3JjPrNN4{Q7ar*IcXe30mHv98$p
zDOxcb^+uWus4O%)H$}xnyoSGR<&8|*-H(=9p%b{;DId0I15NwtPj=CIw*gWTn7F#b
z-0Fs8uZ1=a8U?x<FK2Lk?L+8n?1Zl$ly9WZ0uS5n#%XmdFKE(NJWIF1Z>oC*Rq6EY
z@QT|wBr%8~iCwapC1iIa-g&_fN5ZHbg?&Bh+MltFSa!D{`3Ju}Wk?fkoMFZAoE&2#
z@5A)6Hr=$79hJq#%@zvgMyq94R8e*NXI_g77cyZTt%6V!48;_$&$60ncQO7V(qy>J
z@Fpe<Y{n{&+7zCUB$0M4&Zm}e#lo0q)fxVV&}3Wye>~~*FdyS_p-i2!hk?)lgq1fN
z2lXD$q`aO*czK+yllID_dx{XA$bs;bl(PJt@RX&DH!gsj-><L{23HA?)Q{Bjpm&5!
zL%;X7fW8rJvM@vLmmN%vkF?UtiD*H$@ki^$gAAka#d%By$CA_bTPVry?v9=|X3$Fn
zB<JKtk9OVO#U$re=5PhbBh)ke?3~*R2L%j}_Fa~$E<T79C{vL;K)siGo1ncP{FIGl
z86*15P;p6WRVn0(p+yY|=T8nEKn%J*4(Hy4E)ljBu=&lK(kv$N?c-+Gt0o^Oa4M)H
zb|)#osyTR@24+}TOtC7%LS<e456p<Q)TF!RtKAmjluzgnx0i3#M>gp9U|KFhH;~z2
z`YUL(xLEEy(%F@7a}_jzGi>tWXXrsupkF3hr@Kk}LOkz~4;kWq0=&Kpnl8o#mzwdo
z(+7th^+Va}B;@>Y`nq$^sS74;<?SQ&6+6u}*XJ$W+bg~8)4~`1=D?_d#9Cp?e&d?h
zTbiyqsP%W%MhjMT%4V+GU4QsqEPmb75R;YC7GjMAzxJOU=O6WZZKQwYU*MuKcz<`D
zmtMb=uM{SPe&2WeycZp5;;hDk^qal&CU-)bfwuZ;$_Is814`<NchC8r%pAQwC30N4
zm|A^njl0FmE4+!1j@sF|b~z7YU{%qnZSP9)#<8UIb3`OO#nc3wFB;>ub4IT?$5n$F
zS+FN=E~%f}XEgfa1179{HU+s!-IUGNBaxOqz)J{zn`8)qAKwrf7cKBDTH`he1^vwZ
zRQrzUgjy7qdi8H2#1|RP@QoIi=#AnqM_%;1xNz4N>q!U-Gf>UsuIMZX*u4>lx*n@#
z)C7Bs1Y3mxwg(jBV^m5zbvRY|Zz~@MC`QPqO!hZKC^vvnJImcYc1@#{$K0>`!Hl}d
z!==EtJ%x?V?f}q8Z4P^F`|7KR&|70V)<q+$L6Hx!tfo}el^|kH6SH{&g7V<41l7(j
zT(|7spXymSn?H%qs;~7JkQE=B9Vypm=cuOy*WW{;TQ+@F?GJqM2g$aIe6aejnEyKw
z3QTJPF1^lE45<9CeR?Z&laAE!rCL<7vq>0x6yJW1=)gPlY)cDNELcJwTQ5jsROX;K
zC90pXY@{H##n_68@^g4_FN`%G>WDW+Piw(2@<QKnlu3uBdbx#ct2n|?l>j5={lupb
zs<7+%HY^%Vt-22F4{P4sANJQPx}$$Yz$^D69(XIAt%gX1+3}c)>Xk*LVDkVlyKBGH
zQCU}ZE5QrYHT4IW<~S8x73)ul1Rh#^4(=yL1CG<Eyuem!i)<?kg`0AFU77}>UzjaM
z0WFfY@(NT>diB^T&!SCx78+lB&5A5s3n&Ca4&Hh|!Nynv$4v{;>xo&Z^=-{Z#DQun
z-PK_a1@?90LWluzFnPw8c+ReJFhVr>vPC#LkohwUeFv$L4j<Jm0wKiu7Z@IXWJ`v@
zV!n4tF%WAMI#PE(`h>bO+olS3cO78N(<1$tr%Yi|CpD+F<LUa4adG_ojnE2IWMSNc
zY@WwGs6qvosZQ4}zJypJznwzUV4wkpLQnGr_1KYi=AgS>Xtv3zr!K}QXn3qfTmd4`
z%-swrX?WUI1%?yT;Rn4|b<;`mT8R4T4qs+J<G*X5$zmAt{0Mlo+t{)nYX%ORENq=A
zF|i~~5=o@hPPfTboVs-zFF;-@gLj<Nnl%Vw_wtLOnuCO3`4wR5Tx+t_OQor=z$+bx
zmKftP5Z^XYn${fVWJ7-nyuChZ=uA}a*n5Sfi-D{*PROC)5?`Ddw71?9gk6jnZzrG>
z%1xah4mgsb2;2IxQLGZXatt9-W$14BHiLZxHR;Qly(dK~n3@44en=bdBXm9dKi!Yr
z(7aBHHK@BXu+eAv*?v=&e|+W|juyt3g8b`jCYL^nT(S%mB^4i4dExOAx-3T}Ji}fi
ze%~*&aolC!H-7sX@U^cZ*!mRu_{1TZ<F2wT`58pm6x)mS9w3o7nK4r9uw)Q<Zy9@v
znp7=;tWX_v5EuhzkS4glLux~tWRU+NbO8uZM*>}IudcbjXU(5uJbeh4qB-(dI(L}Y
zhKFR2KsjaK^~0*CfAs=J?|8gSEZvS%X2Mgy7a3Yu7DDHI{?yLPrj-7Tw<xO8(q?@A
zAcRoia!+1BheI<R=Sg5#7FqpEzS#MD87uMbfu0>zSBlGnqAF<2sVu22`pa_kCL+vI
z+^MoJ7H<x_eot~(pAA&EJd-b;zun&=yDa%|CO|%Im4D11=?k*hOhBS_hW!iKqyR$m
zksoiBRMig}F`(@rO%b^!{Fd$A1I@*w_mOVF8vMK(u1VMES-+r$7O$358dqmCm4x51
zUAA<bHIn!5_%eLG@nBk0d)qh;XVUkq;ZwbWOF6Ne)tPgzvwBf+j}OmNWjJh*r^oQu
zX)=F)HQ-0R)T3B^=N+Oh0#f1487UFi`#2M=mos!ud`$D<xlNT!xZ1kIr*HDK;s$V9
zof-$!t0tZBC=py5mvi{{p&Y<^wkhOa&y%JL;39<^hkGUh3LinhxW#6nvx^&+c^S;r
zyZmo202)F9{!Q{6W`%7_NDFgDQP89ko#zR?o**E&aT@t-v}t&?%_&L<t!@yXH3mF+
zswq;vK8^K()QmqC8yT^Xt!m&B&IX}SEd$j<mK^yYtXNRB$g-sSKd*M3Z;ljdpv9fH
zzA4CLJ&%x2{do>gR$v750hf>_{1YktOqUfR@5Mnip1XmbSCm}mKK8C9(zO{(V;eOK
zwH+%VI|&7VS6a#srFQ00&{BxEi$IL*{*p_-<RZ3$LP-GL0j?1#+e`k#PefiaV@%D*
z$}y3Cb^Xk<ICz#M+T_YGj>%Q_t1~HEb9cXnjnOF_-|1FqcZOoa9MU(|W0Bt4uYsCF
zvs9RD<fd8;ba~98rH{C3@q3e%ryyw#z8@pSCzY2j9i~d!a@4Jp)f{>?Vf*41qzYKv
z_dG6gR3G#?`%&DB4LUi|$lyeBhOOUrf2r_E1ARWhBJTX*otsb`w7_5qQXrI)lIBIx
zisaa6p}c&ctH*wtsslrdqa(#{VDEyMdvqscd*XB58}CxwUJp7i)oq*a(+;z-0d0<9
zT^`jU8J(3Kvdr@c=x+UF``mkEO2)u5h|^QV)<uW|9&4n<7mv;HuV9GvQ?ybgo|F7w
zp!NLlSC`i_bm5z%k3^V+JzZli>d~^`@R~icA(!zIHF+LW%G4f}8dVp@MkX|DD>)#M
zr)PXkM_xFbr6_MUs?+)A!DJln-LI$L+`sw!j?+GaZ`f8^gUYizT%@&wS6>V&(<zs<
zT#G0r(5(nqoq~hzUhjl0!}B8+=l2Ph=`!h(XZl?8`WUb8Q?DzLDotNzr?u%_wJd4v
zhbG6?qYm1p%0s)1p*aN0%b{rbEDh6HdjD#XCq-qmxK5JXypisAHnyQ$jjEq;&1|Z6
zdz+}IWJx(sYf)}Rw|(^S^>k@Q!+qiT!<H0dVFqJWo+g$O&eiY58eV(=Kl09%Pt7}V
ztF)F!E#DhB6jSjf4zwph*&>3#Q~`d(B<<Yq)0XZVT{mP^QWNc+0D)`34Xk#8_hJRy
zYzsvdUgn4<XdK*tTR!-GtgGk9ZLVo|Ckp1Dr<mt>?MBxqF|c>&(ADk3`{W(TGOnz+
z40JuS_&tbmc)R>#EP=}XbB^pLPY=AQl3VVc@KCF(38~tWjo+<R$>M>cNKrNQstb9^
z=zl$a1lj#>$Jke)6`Ffosq0%=Ie6JUn}|&+Q@ouJ0J^^Ssy-(-{h956+qY;^>!%b_
zZp~mIJ96WE=u)2gJ?^{3d(5-VCFU(T_O<8|y_b{v8uzcU*<)cc$hfeInw?g!gz+jb
zcP_T8EvJwOO`BhQSUqJI!<>O4rDCnkW`dy$_>lP-H?88oUJKE%8YYZK`ek2<m}O-y
z5NpqGxFw$`u`xImBbng~w^sq9Cb2&}{}P`$Ecw{(oYx6Yfpd6de+BGZ>CsmY7uqNs
zlT@U^S6c8-39`R@9X{Is@*TfuTHtBm9zlDzsU4ibnM}uZJ#;O+gK$L-P{Rdp#@<5W
zda~DUjHHUXe9_GF9&~`LZ?<=O1)2pls4)YcuolQ|(q*SC<(%rP5QQXn@*Kfv%;&@%
z#6x^<=SCtL4ws91GN|^XoC12J;(7@GJO)$aT}fq!k-!roqb;=AWVhHSVsL|Hd%TqH
zU3q245#vUew&9}6{OHn^j||_}_>!&BL)tpCwhbZoK9*DSV)NwllpU!j+^3ceD;R_{
zUlzwzjdY6vVr#IoJSx$z+OCZqZKPMNIpf`35w{aVu*lbuB-@L6K7MK8bHSyF8`0)I
zxo<>vPJW5@>p<4%Sd~G?t=~&>XY*y*m>uKCurO+}O_5`XrGY9pf~j<IQKJW+=3{%T
zic^y|!d=~v7J8H^F5#X8R;=%K@Hx(#jsg4vlGITFMn`DVu282?=ofgr^QNMC^CQch
zvKb@qWY0n!i;bH7bJfwZU{dq{t-SkwsQm8t0Ys2}?$D4p)5PUuYkw6gtxf1Q=$|P3
zUQAj+VVNA>CJ&6@VC>_Z|Mg@3_@4pI*l3xK<F&zJ9OR-@VkUx!le9v9+EF$$5ptN0
z7_sR6{`LgRpEajSmKUbM_tgB4&w)PzB0YZQ`wMARFcD2(H7q)YN6nDG4xr(FolF>0
zM}98S-{iEfMtWyJ6Bs}Cx6l4jeEr5m{`v;tPFw+iZDtYuT>dBmYMtHd<p?;f*@RP(
z0NuExxtD)&IDTGTMTEcSpWTX|&ABCC%&H4O;BSJQjsZErF0#acZ50}7h?+}G6iDBx
zlMNnEWQ6&T#}hD0F_`mjN|0xoP1Jus>4`|k0kmuePMj9cA~1&NfoxuQ#$E$y<7P1h
z;Iz5#^wR%yVfgbFLVL@M;6GmK^ywJ{c}sIFQ|;?!NtXu2RWnCC&-DVlZWZ`kVGQN?
z=6v;^iSq5HEN(Nb|M*$I7o6Dza)=PKtrVJsg+*oO8uxF$<u}EHX%|`&&=6>bRBTk}
z^&#ZpM8nb=<Ec5;X-oBgd~CpbF6h$X>$xE6pY+in<jd$eFBDKRC({eI_y9{}70+%1
zCZ^2y1GFg&)7HMzlZvY*_>7>MolS21Y<(GyQaRNHQ{<ZkIUu<T78d_wIbTTj$NT41
zoKiFcf2Y}71jlf}D#&Ua0K8@dOM)WzX$e$vp`eE0W<<>d%py1eOt^8s3<NLlN9*GB
zmI>G#gBF+o<tlyxoPp^v$S_<WCkRDc3^tpBXt=8I>VQ<*j6h^1PzO|1d-Nfc>Lr1I
z0pIuT=7B^f!W7(rbzsDibN15MD}&2-0Ybh6-XRQp$`hVd;r=$i<ZXXE@?VP(>e<_x
zW?<h!jQ9<V118zr8OPpo`p8jAV#$Tu!k~OFfjM=d&;;Rkgw8zv;wBkAI|d#li@%)}
z(y9MW!h%ZoJ}=QmQ)}~fNRS|C)yzR7>MGTODWky#OOGYZKO5Hj1-PA8d6=$E3jDFJ
zD)7dCGEIJ$*2rV~=N4Q+T30l2aQ6eQa+`hm=R+X><2U`;2M>4+i<@)M9)gLK#?<D&
zyfKIc>;H}bOK$My+RumdPa&@j4D_%34w?S-Gm!85D**j*pTGZ<rok)Ob2Zu5>YaY8
zP=D=%R&1j`1l`{QF5HK~18m3a-#b44G&mL!Nhtj!(&JEItp2?X_x){u#Oc3XS4E8c
z(}+cbu`_}{-~Z<g7%avvO8)&te_i(<p#=;v=4u|FI<Bk!Lv+Lv@Laix{{Ot_pPvPj
z!`s}Sxp7Pm5j*`oR(-z%fB&KX=KJNj;QgtW3IDj?v_;rOPyhQh|N4n`hvD7DS@}eG
z6|w$F|9u<$bIGs-976&C0ZD|;oj(?5(+ro{`v3A-fthgUsF_=!t0-38*(5t?>D#n5
z;o2u_##}-&eq;@67vsR+x9Rs^jfajPGfAlB`qWa~wSV62Kc3}}g$$z)QO6Fq8vo0x
zeJ0;@;@9%|<EQ+~4VHI@(@(r(VCu)+r^8G__UDgN@7GPc{hwCSwrs#-WFg~_bi7a#
znl?LM{;w;lX^_Q?|3`0G8ygVx)bmu<lE0QZ@--@CE_%P0kvc5vKxK%PyAHycBUcuG
z^#UHg`gtjV3H~F0+OZ%zBNrRkm#=cg%MHkBgRqOr{;?3uaLH)P=4xzWMKTsC2gTKf
zKL2?E-<MdxF8N?BeAXm3O6jEM+W70M4~qZr)PmQnkPH6=da{rX5k2Qg5oXhh^Dqes
zd62)KUjXqA#s}&8k7fV*I63wYHA`Z9P%w||nZoat7a}VW{MXAEWJMwp_Jl<lF6}Rx
z)*s94KR<;ZO(`=M2#6;oV$&-BxJ9%)m`VRVhx_@sn{Kl}Fjir-Vc-q$!EFE6CGd|J
z^pCCi7OWKb2YUbBHfw4kFY)-tdirNHm!E`pJ4jm)ARl`n)_>z)?+vWr?Z29De|<UJ
zN(6omjE0@{+_Sal$=@G;z*QErqrcbjUpMpbC&RV`M1@m+{!SU+-%ZJG#`iy5^UqI%
zD4$>tn?STe{hv2_i3xL5;XiHMpS#kI9lqTusL=k`cXpHhWlr$NefT2;|M81zL*bWu
z9wk`+{}oIJkW{KXbTs7m#tqoS?)}S*=Jyx<+r@{*gDcN;K|4E>m+1U)rN938-<}xq
znu#;8;SEc!T>A44MUx9>{{10-U!ni}_0#h3?vsp%S6Ko~xXdPhU+RB)%D<QN&+|VJ
zBHzx;=}OgKm;3z%5D<I+4x|70=U*g;Ute=U#+DgbhW`>ze}DNuulM`cATKwML>9Zq
zh3{G0-<C2zuHD%`@5PTR`*FR$VmNFm*ybgl1&zPIG6WxElJ|cP*@S$UMB$4#It0Z<
z|K=tWkqO8Dn}7b}w}gnoBR{*hH0k+cwKfg1nK}F~XW#dqav=p4Yjb|BVD`>mSk~{`
z|DW@vUyQT>IO?uubs9eYeUY>g*pB7D|Mu6-{Ns{;JuzAic;G~Lu&l-F;{7ZCJi!0j
z$v+Fmzdi@KxSy$K2$Ff6-Kw6v@@u0a1+ZFE(*KnDT=0jVj(J-hj#m#cjMe|I@jalQ
z0(Pp`yQE)L)sJf?yoOnYoF~7R-H$*1eZ}}=*Ji+%KbBL_g)(zV2NQwrKTUt|qk-Yu
zrtV(AcWefjhXLVXT@%FT|8Z??c%MN~Kw{YuLxj+ACBRRs8GI9%8$f5Eyb59_5rEF=
zW|igkwOj-97%8!<Gu(|N8yG))cPjE)h?_kk_vv)?T3JssB7o+ZFy}m=Kp{O(&SWEU
zz%DCXdPO>d5N}4rqGT54sFA?b2j5SVZ@!MoX)~Ou)?<|O^XWV2-Z(rTIl2e!+n9-y
zFv$_!&Hp3_kYCn%fQ4tla|0il>o{Ee8DK67a8r>}bn-@es0UMWkV#AmHlAdXi|a!W
zrN<!9u=9B*a};<1)pHU2&443QCw>8<h4?fV5wg!|*RYEk|Lt-z%dfn!F?B(Dt_t#%
zC49_p`(usG;JiSid)Yj=<i$Du5u+r)T+u*bJ=F`n@L31b0|NxqY(d~<CQz9nyk`%^
z8%06iKcA8t4dywafy7cg<Qn%tc0QEiHycc&v<k|sljJjq)l(bMUAGIQ5}@P2?4Syu
z>NtRQS%iXn7!TPHGWjSl=jbWjuB79(07&D5G{A$y=Na_GTL6uwI0h^`fAk7s8;4lF
zsdX1vW0N66PIb33%3hzc75$p_=CjRrvky{Vo5V<!m`wyeQmKQx5`v5vK&Z>+pvo~v
z++e`*{$>hX;{AhZ_+?@;pl{UA+YkP-5qp!+rR6<zLTQpryqD#t5fY}q@B2)s+J}~2
z(;fsJz4lUT7Lh{P7Me66y>C3GQdK&j)jA+X-}g6Cw<s|dmWJ-PK!<`X@mVXmS`Z?k
zmI0m5jyl6X8WYoJo9_M<#_z&@9%;3F0QWOsmlOpxE1M9#buprLCHzjhq;RU94GEwL
zMI9d^P}0BiEamly@8?{T8CR3IpYYh5+C_?{Nf$py2aD>bK5L~N29|5u`~$KFj^kGf
zi$zb>PlJ{DEI?3u$F0|?GUl#=I=G~ZD=i2nmp<{F1CgL=O{3t-7h={o;J9RPIsIg5
zm<nZBm=|Ivg3DCEKD$G`;(^!>fC?)J0pK^4^n_x6CO7}7i&M-ocyF#ox9Wl+vYBgd
z%k$@z=IGbkb%-4iVozm@6sGwz{wQXRG}ojs4dbyVM1Gv#0n?KYsPdQlpz?d4DNEM;
zY#rwQCgSoogxfB%6~IRM>Q7TwYS}@pnvoV#HM8>`nnK+gmOs|y?_2x*Jjo<?`Pt`K
zvOuccPcxUrk#APSLpOT>X#=;I{4JH>m<Oa_rkL-4CJ;EcnoPfLCRb5Kj#XD|fKEeh
zIqS^9PLK`Yt!Av!`r#3{``~6DQ36&gM-;sEPtfBrfQ0dQp98yBAG8s^UqaIir&-^>
z9x7Sa5%<%3makTl>@;DgT4AFJ-GJLmTxMYSqxPZ2$M)MwdlRI6UXl}rMa!B4j7*Fu
z8Y*nQA#Ej?e7>F4G|0bBOIcEE#fz!VZNPUnycKcUYk~dql3ST9+aQxL^ZgVE46>63
zpJ+XCPk90!i?(tH`n0FKkE<FsxRCUFJQh4TB^j9X->8rrx6eXyLQETo{0c%POoVla
zLcI4ZM~ac-Du9}7Q2iwN_gR@^`G|OtC5G<gRU^1kz+a<-u1#P5;Bn78_vo@`joc?Z
zbzqA*IbfSiAl9SXrEl_4#Oy$E;V$&+LC8ujK+XP0AVhybP3ENE=T9Yo+){~;T&P91
zQa$KMiQbC;ajeU;WB2~L)4xtWao9?gA@_AT2AMDo66LY5VOsAe9Jf@(A7{&;EGCTi
zJ$(>$ebDVm#r?oefk$Vt^FVo5m6Whc{Sk%`oHR1Y2(1DkVST~!C5Q0ET^Qq)=SbtU
zhKN0m7c6rOX8N=K<DB}P)O&Ek*=p={xd@tHECKC!G7ueRA>v=1@|XNkJ{DtB8`x#r
z?Y{t?&oStgQJVu&7ny`0Fy;V46Mv5>;PpRg5bDV=)&Pl_J|-|l%nS+Afe3#v{tZrt
zVcW*uenb-r%<D=&eLwKK`$)h~ahp5i2Cz+90Ke94ztm!n4AGPKB4QA}|2<Ow+!Z)<
z7=?c(_yHzRFg>P;$#H^RI-zRT<OC)O&9mRWe)x87WF9yiYsp^$<0n)P9T8$<KeS;y
zR6+NOed9Ia4q23+6g-rnn{+zO&|c^&kLSiE0PC6CP7CTkt^Lw<&UMYu*mTV@Ie0zb
zGEZ8rj<q}C-AcWxfl6uL-aPfXC4TD{?&lL!na2v^SIg4vNxo#;9ff(p)aY9t@HwI2
z)6SX>Fz%2W$aO+s{@B4RAf80fNzl-8_kE!iTekDTlBKd`!-CNAUc{B<tNbgyu32(b
z6&9S)l{rT_BiE7{i4|jMz69rH#@)hL$gRKCr7^06iO~E%fxKxdiQE&QQDN{b`&c^N
zTTx8AzpQz)`%&$nEogv7SNzx+yyj{E*(@P*Ct9olsuO*`gA$&KSrgvne`cyb^DB8d
zh%x)DB1r2&{v#>khgA)ay4e}FG>)3cnP0ttizZ=rUdy0rYG9EEKLMhLIm~`02HPNA
zju)SRoAT;mcKxA;MH(kwam-MKSC}45j5Wg)C|ZPXD{DV2jp%leBb9AH6=7mxHQz?a
zs=d&XF-M>jP_funBWxB(h|>BEP~X6-5EsdR#(_16X+4-x9R;CYiqT{6^ei~>;Q<W#
z2LIJ(@RMaqIT5uRC-+#aYU8U1D6&HV;FhR|Fc-Ym7Cz$*HYUNKF_QLKlztO_xfiry
z<}leJ0!8Wx##h)T3lKCL1qAFs!Ss#xJI{e&4ZL6_r7~O(B+oNGMOabDJQ1y%;j7wK
z(I=_%4sxB%A}uw2UoDPny$3)&B4s5WXMq2uJ&?-rh1-u2e8KETf}gwq4AD6mjBrPN
zH&4`^y@@D(?r(*7wysC?n8HL6Os72KboQO@`CLRAM?R8l8Fp%)Wp}*&HF7W`UFI_#
z+rV##?5Bxg-P@4C39aB}rbYNwLteOIFIw*S9Uyd`&tP@9s(o!G>=XX1PI|@;;PlM*
zKB8Ke<5??PXiGL*zbe*sK7~=^*b-dvtmkV%tIi%rvy=Dac7_ka&;&s^!M=KQM`@N5
z!U**E>h`#8dD%35Y<c-m{M#X=@fSATmV{>6rmnJ#NOlR2;hatQioZlOYM)MKJWpoj
zx$8>&?C?F;`SxJA?Ltkd^UYq0)5EuT@~A;0bdxsPsUmH6ka@`=>Zvda1Yfe16F1vv
z`&^}9Z2SGvTi(y_iWAZev`5hm%ULR^6t=e`Ar=`ugCSdyCab|bgAxfT4e0_vg&H}+
zgdx;MO>7g@0yUr+h@9ktJMHh@eBFJjRV#7o%K@;mv`qsGo_R(a7g(IBn~W16sRF=A
z&0{^P<J|tKF^+!C_8hDIg%gYTi#T86=EW^q!_bHb(uJ67*!AArU5%T$Z^~TiH#{$^
z;VQq15{a7ZiwVRPEj?>k^X{TEb(Q2;&H2}mEp}ZAh2X~@br)r-z|?-c-{FPZ``cgd
zs2y95Z+K1Y%aj+&=%(WFTCA!${V+@i5uP>i_Z<G}ePU}4Ne7mgLUvtEv~WHuR|J;&
zm&yt`54i-nCu||fsR6;fx>nkQG+;)ul?qLE1^XpT3Dx-`nkJ56#3lQs^x|q_oX$f;
zl3bhoIQb;qKq!AyFylcWiL8V$4%hsu6FX)v#pXzlL;atz6k`0}+YEyO5dZtgk0ZHv
z05NHbV_6rQkV3tk;KeD(8Vxi;^zJi*RqBXu5H(P9Jhv^rg4KCgvsZXDX)w(o41lrI
z`P8h|iICOqo)JEmvLBd5kll&E%U+suZ`}-7p7;2eu=#zs@rD4HM(P~&Y6MzmqHn%*
z1=<Q>eW#Loz-2aL93jIFU>K9$YvTFRe9AtfYy-Cp__v3H_$y*RyV5(#A%_waAA}u@
z?&_C;|AMvLMf0%BVC&8{B2D|Q_UPS0d>@1b^`!*Ok!prRW&(<Y7MP{O{NlCV9WS#e
zJ50~8YosJB^{RQZ60TDS1+3F@1cCm-6G-hihjX8LY4t_=U=tt~uvyM#{O3DlJ7V0(
zSmKhv2Ie(A*`b^eNGGvZ5CX*Z4OU{?pmV}V4%HeUp3^?KAQz;8n}zU(m&S);tm=eg
zFNB^LK8DIoy~AZ<HvprONyDe}g*t@oW1ZGmy<Y*qgK6=|Kzm>)c^9-AO?zkP7HrdY
zhX)D>a)=e!gk^wsg!zTi=E1|pve>1lujgxSerjQ6W2IqZ2ucOYtErC4IoW=^{u|YU
zAr&b&bzn}68-<ZDAiGDJ)I=75zoWntS%S<$i0-#u(J-LiWHmbl&ViVgm^sqpT3k+b
zKmyoTPgC~Zr&$#5&1d&~LGUHz#9w>e$n%x;tX|{3PNw5a40D_fM9PRZOWVvpvb%4U
zQr&X-?15zf>-yXJocG(dyOPXg8IF+j$E8r+sIauyU6S*S-bmU6Rl^P%SWHs*n%`KD
zcHm(MSswN@>!S~evPHP3QihqnD{96|V!}aq9RjIg4Y!KDE!pQ<oqgsRFw7Z!>`wYh
zIg#MtuBkLap?A3TTo`Y>b^By`PuLxVe%eGHx>(E`>9KL<+dEpDTw6v9nJYWvPy<L$
z#_2FQ-rOzE)Cn$i$$sOmk#iGKYNn1+E2u=-ZU>GZTV!r#e*7UyI%wFL_GI<REN1fd
z=8bm-$`S|5hYgDvkB|+EQ4i@pu6daj?9_klaeO*+m-k>YLP3^iYA5))*Fi^co@-!Q
zNj+F4UY10=H7@;Qx{xc^xUfxA&<%|CQJ9S&Jq1-7s}0>l!LoN#vog89jM|$nHx;C5
z320Zz4%v7Sb2$xJck@0ZEbkRndoPNZXSes}?5c$Ne<d*egqh}>R54zsX$6jX-(G5K
z>S(_Uzgvih@7N@HFUm5FxUh1-^<5x|!ricQ%j{7-Eukb&5{pe4#~%9y;CxS?z*Ii(
z-*;R*^a-d2uD!x2l|r(^D`be>m4l+*{2WbF{dg}0GkVh#R#~jf3l^7|;Hx9@SFl`F
z_N_&P*u4tEE^s8uTu)BEi@F>hEK!3e7d>Nr3X>9>GZSaTSgRd5fKYK6=Rbq*5j#2S
zoGRB*J%`L~hoVv?`K*LQ;#tO=!}UuS{B-Zu3&)HgV!W?Vu6$J61gduEwNZU;^39#d
zA_{+8i~wB|(bgm1{nxKe4ST7oF)0WxSEHCqG1v|%5V>9k3Pb~GKcz-yB5?p-GZ*$%
z{SAHdFeSuv&hK@PN+c?9caY{4*uLS8+dsU=CS-tX@WPAKSIb5!-M3$Sr3`u`I0|e*
zBZj1t!p5dUTS0yiwyhP@25(s}oOTK1iD^jePka5)l)X04IL`99SBqB~=M}Zdt7Edh
zq25-n9)1}1Qz%lDY&n!o!NgVWc!EOdrsK$XXJwYw`>;gxOv(uLL?)F9SCl*Ny#`}T
zz1nnCqw#v!da|+m*V&zD+1tI5xZxY@zy=?_NhbfL@!Q95m}7h~UW~~m^UEiWj>fnO
z8!J8Boo$sE?X1K(eROOjH_3qS!QR-Tn?n-s4M`c;GmhPT=qUA=#%tavsTj%6eO3+M
zfX#Pznv0{bWCmMrbPI#AyIfL#WR~pFq^FHg<||md0fl6a=ACZ`jo%{c$%Nqe_ic7q
z9~`^V#_+?F?T>2VnPSuE&omsw(9k_)nCcjqfWF(=s3uTHn&!8O^&QtvATHG_PJVzp
zgEmUwE{Tlt*|)u;DqClTzLh~!U+H-BuAqBx35;3NZ_bgx5<-)`K5n2?tAQG8aVKQE
zRUIYjFwv{FRm8iFRu6udC%XH>ej_NJ@cuMP=MkryO5VUkKZV`KE&5nKeXlD)I``2T
zV&H+a;#dD%#PV((sxeMH;~9OIGE=K2YT2?b?(nRs=o_d47`?dFLlR#pr3Z{Pcw`_1
z$fGZvGw0t3l>}p;uAC#lrzN>UHKcA@WPOq@w1gziL9|GFa{bE9QzUNIlO0~8-KC}a
z*~BL0XoHli%HHZ~9W<{r3Ct(NJx3m?Nl$1M*5|mQ+%-(jnt}_#U41f@xzqd~YDsO+
z-=K9Elb}r=>k=O;1~O;kN@Dp?8683SzR&-w?7Rc9>fb+}O*Un3(Xf+|y+@QZWOYYI
z_6T)TWM_{wNJc_RR#_F1Eo5b6Jysb}?y@5KT^}v=Jm2r{`Ti$5=f2N5pL2b#@xETy
zoy9AkY3xTG_c3?hcYMiNa~IlDk<<bS&YeD{Eu}vu=GWlrd;2WR{u1r&g&IMLuN9E+
zSQ~8`NZ}T&?{`z=r6(YrV7pmrG<ivu@Oscz$=Gk3d#VIs;or-LOh*V6j;c`Hv+g%U
z;e9~#mZM|az44F}u0d(}<gv}2I!;VxcSuy)vz4wq;>?w>bNuV(2dYA|GuJz>r`N}(
zi}m$IkZv`ZV(i!D?3)QUl3PzN&lQWbO1RPfN~nCe-4etFQ}oTpqRLV~&>Nm5arT@N
zB#i^wNxr+wlsAU!?%f*}UQauMk1)qCN^U=9vy^vf&EBTCb?&2rFa?g~=tAvkWG<~T
z*Xe_Aor^j79VH%l15oYQBi^ceJ2|rXoyLd<5+n;F)h%yYH`|_AWtKABOD@c;C7(Du
z0o&c=(P<C?^_{7RO%k)Hn{Nu{$~<KTx_ql!8A%o=Of|Znbj>#Gxf3f&IM_H9hV)D~
zt))xmyo|)#YqWBg=ufm?`dDbs7O{15m{acY*}-YESW3p)wXHL(k}<Kdwr86#t2l#(
zHSOTXyRx=>)DUw@h<ZC(7B(vO>Lf0Y_a#%!R<xFXIv|kqN|A^5BM>%ZF9NUdP~f=p
zIw*)5qxWh2LxNb6rVZuIARJ3|MfJ=xXAYr<!_J2~t}dsIMlHWN->p-lu_tNUJ~1Nh
zqtcS6cP5e0Vy;insd=dP-z;YS&<e5@oVC(;t$RV2FTW&BsOZ8;2mCxa)a07kngjvp
zswJoFIfs-rjdKsnDn}@DjPyA?Zabs-X$~6XLl`IE;MqKFS1e2a3qlmu7cX6HcUkas
zvbi<u4?=wO>E`00ZLG4e6^*vooSawpxhlPOH)s{>tL7V^9kr9ZpP<~-<v9_19zMs*
zzSllA`8hVnvg<XbYvuzzJNMF^viuDBNj92AzD@{XPU*gvEKHjB@6IjA#?%@QMtQxt
z7OEFEF<EW(@omOhozye^NV^GXL!oq4R>-EDvYFUFG;NK9(Lxf9YX%ASU!l2UJ4Aes
z2V~M)mj(ifcuLP}UGNc#5faz#78CA!kHW%Zd#>p|b>$kG2#dO*u3#2AyJ>hx0^w%O
z!bZm4FA5<M#a=O7<sXKO#o?4K9N9D{X8?G3J8PL7uLD>O+n70lbyVoI*os`pgi3-|
zfI)9}EE%QljN>CR?EL~cdv;@ejgC0h&WO`bk2gWseOMw$DslQyQly|WpKf?D<m2a?
zKEvTl<V{<Lv5(Tnx?!`6@p-PZA=-BD2wqA7<*OYg%F5&JOAQ0>#=Ri&5$!s5w7|%2
z1Y1~TVk9W@rmX3vQ_-;uy3qZX<Bq1I2sO0VZ}1DMA{^)1eIyK@c#c|yq$WP;fr<d(
zk)r9<I#%abVZz&bq6)->rFCqYJYVdOm2j&4e8s*@k$>q7j9eLaQtgk3wse^24O%nS
z5$M;?zNImG1tW-1IU2h2O^;q3(NQ;_<siAEA~R3Y{?#l|<xzu$REzVET)_O0I!+#t
z)3+nIQrwc?xeBeKELF~=9ov|e$Idfj(2+tK^4Kj+hsbSB%X%3?8uD=dGYMALExQM1
z(}rYli#O~oLo|9FzNH<1E28|+8;KgC?dE~zOE0wD_;2w~MN#}!+<Y;FeFm?0$lD$#
zVC@YlkDkL{0rZq30mH{f>35aOp*CSMinE8eDSB(hGb;>>X2SFrPsml=-yP<~upe?K
z1gFI_J!^G3`4b)Ju=gUg!acf%Ybe#ijE0W#Yb>mOZQzY2#5{a7%x8lV6QmfV3G}f=
zNlYK}7Q(}lBAEp^=-kP_|Malb^ov)qq%X2|9?cM|oq<(qBz4Ka-=OFnQtn%>!~G2H
z&iUd7n6_grd_hTFm3Vz2<-=k^;`P?P_uKP&ktA}hs*ic2c@j9dIX1Kzokc2gJ<|nZ
zuR~BhfDLeUBhXJv6(SFOu38(@$FL<3OiV$?N|J8!WA>#CX*%u~sNEzVl*4K|Y#93%
z!v;f|jozNCWydG5mJXvS<Gm8DC2zkP6yLE{rc~p<8}y~G=UKOJ&@&_G&VH_Rfy{A9
zT8lZ!cy6Ho-b3T#hm0fovQEm;p$?RchbhoW?GZGCn(w#19``B%uZiJ+i9kCH<1i6;
zqG#G6fYFcTBtBgLAouHNkd%SDgs{+fwnpiudbVghso{y=9=BNtFyE!rs0v%IDoNt0
zUWirt$hX@+26Y3{*<Q01z-W%nLl=*2$P%qw?MP8~$ZZ-|?!c^TQPOIswegJgKZt_6
zON|irhL_eB#5&L1eikA^s_XUET+#k^GDOA~lzPv>@Oo>$Df&f330XP=-44^|aw6jn
z$?QgnPIE{;Qj@OE1z!#koMoqC)g{+R>dh^gfoRt6o|JZU$$r8cq?m`(3*hGmdwoTX
zIlj^{PpS$Gyhb6-y0)tk%qL^cyS3Z72rGbvw09L$(>RM?TPJF<c|lc)n9%gL-zDfz
zFuZMw?LiHeDrb$Wq{m<{VjKk$MSyf!_s!iyhISdDV^GM+z>3?sF`K`G*_dop(!;)o
z_fgN@;bhcK*(Rq?yq0C$F2g!s0;}=P+%o=H+Ef!3e3q7~cG7E2q@>zS$6}Cvb8zoz
z>PsH<-CAw1hK#mN3_&T35viH-0WU!$$FI-fKZPil+i=Y{W^@W1>+Ztl87M~2SQjoW
z=N#D^RltQogY0(M%GT#*`N~`}?uwGBRcl8|+e%)6s?lAkbtwoU&hoSppL`p`?%<jV
z>;SI~X!Fgt3w<*ha}!VwaYnsxTdx(OPFe)5*|z^KIkPUG%D(7bvF|W@?*H{3c<s?X
z=8=Rh@2@fI-5Z+=T{^MwV$YuB>g*}#4>;axI=F18Wg<0FL)iGl8vsZ2s$pw-VejqU
z3=>>7#?@)3J|#1h{cTO5B<9b$F$?O*p|yL9D&C-0La_Uo<kM8n>>-6TyxZ<crm@OV
zj(AQYt!U%Dr!)#(j|w2l3<4AbFW2Y_v;rAu8OCk9z9Y!i(~6rP3VY!Qc#oglkLE-v
zXro3pt#l2p&b5NvBBaQSH6t|dhp_Z0v>Ho6^sZsQj?$YCriDRydcJ~Hvd`v@D7dbN
zKLnxD-H9H4G8&=>fQvMT)?qSjKL*#vpxvC|WlEX@`(ULIXYvKOb!d38=0OJ^-&THa
zVA4|`t>>9jX$X$QH#!@~CA#ah1(1=*VVE#hiQ>gJ*O0&~8n)-lqD!i<vd)nWB+<cw
z+IeR^rwW)W;x*laxI**G=3S*vM{U$SX42MOu|q7>idacBaxrc3ZAN%?ocAc|WF2>$
zLG@*}q^Cnrs>M&H+TSzTg91HYMht&=rk>ZUl~-#0CjeJ!ql68!d4Mqw!JsKNsM$W4
z$3lQ-Bt7*`^4f!CgXqY{y^|zJi>ov*he(&1?=3IZvhKi4%-u@RO(}}Joc&<Vy83K=
z?p5=bBN}zNR}=95vbv~J7F2nEh)K61Odw(l-R)OEEaAa31mornRXw7zIoB3NV85({
zw1-FN-Rg<(6SS&-ZoFV;WlD=D|7N6!aS>>eREXHAEOFz&n>hPZEoz}!gli#hapU2K
z_tRc`tX<I>v5R&YUf0V!6HX3vegd=6OZGY8QD@N9iQ#AY*K@pFFUI(tsPUQ_T^=`_
zf86RVOqKgocp7&V_b|60l&rK{&vZjm_G|mHx$gUUbNQ?d4?oU5l_LC`)4@V$ygjIX
zSB+as-|AvX1v@;}N)u9%IL`!@enB3!J5(WU4dRfI)x;F{<`&hA?kBV=o5{I$U7FC(
zm3hV)N-yol0z$AT^U-jDZbVSMY=X<TdUX&L8eCv{@0FalRr@Sm@yi-&Vb&m4`_4MH
zSv58O2HJQV!(UKnn$<RW%9*!ccjqD8K9xVOP|uQgKBS!kPy=(|rQD61e_d~(KNB0a
z?$4F0=Px76mW_%4ejay{O{J|p!a~z%Th&?Vzs~JVn%Yf^={S$DTr?H&cR5Jeck*X{
zz68?e2X*3atmI?KLs@A9xnGUP$6Sh4XD~nR_I%7NVIFIB-&S-dU7nxot9_li^v-W{
z5xUoI0Qxf*rq2x#*K>>HvR2u;H77`<)p{-1WgeVmV!bDVpM*NThLv#-n}o@6hPtLD
zg0mb?pJXJt%A+0^BP?D-wMn^kndXkrlP}NIJR+Uzy_<KgQ(7yD<zyI!xAfduVJYK7
z<rl?FI=rEq|Gt-uRRue_#f1)f*Vw(K!beJYY{SDQN~M^<;ZxdjqQ=R9<-JZp-1ZyW
z87|cD<u;26?MW7pp*)`uy)`R493<WDq`gP}gV|xbh_o)xEUY8ZI<TZO+tZvTA}>yP
zk)=WBrJmGk&UEO#h8Tv!VnpmCMb{14aIlYe9WaP&qNCLprsz0&Vf(#Lw3=+Rb8|r@
z%w@L6$zQy#6hF#V1o8mM`6EoRT%&4)0tGqaEP1MpYia#hc9<g8#pA5f2t;U0Loe2L
zHa42Vk;uH{^VbUV2}UgYtYf|9+PL|fZ!Q*K)^TI?Jvwx?t)NtBm-&2oFU~lA+p{iv
z@z3Ugmj#N)GD3O_jLeS+9*bytP$Hc8EV;aF*$^Agm_nsFnMlPtvwHNP#Br0vwy1*G
zt(*W-ZQpj=;rzRf(1L7}{&nyUo;e;LyP0sNtVX)GUTtUHa|I%XUg+dIKb?76caLmT
zmxS?7<&OluGTN+Rbz`QqAN&e%l#;z%E+1$<^qAbBZ8ayyV|%B;Q1*_$_VR)Q*VaRs
z%9AMrt^FKOphZsPr?fjRW%3_gl?#tfv<g0IKC&~YeN(am{YXrSC-6&>?S<qw$%-|S
zAisekbv(uc%4GIW?v*JeTV9#bAxO3fD?6-5ehzBmr$snWesyb+pBIC6gHGFNk?3&4
z)g6=~=5ZXyhCxlC=}q^#@-HaV?EQH2#>)fybwh7Zi9C!ZbwU++G+{@=Q$N;7Le-Bl
zlR=l?1WHD#wA7}JayZGO@AN0e-R)gXb3ACx54NwX>{nlr9vaq4G@VnsKG51Hd|Fl@
z#hlJ&p_jBB6%-K0(c#<zy06)Ih4;=D$$4&P&L(b>UIuYNH=i_Q!CxH|kEe5))x3%+
zd}+%2IV9D_tAnrnX+cVfsH1@E)GAR`nBXg+MT4-C8XnJ%C0Zj+Q_I)N!#<F8w^Tn~
zjdG#BzPT)iEdo|cS=ztp*(-hN@?HLMSt~teuol?y;<aeeM&wFCgO_Ra6uF@s&3z|>
z(vF_ueyG#UH;cD}nbEl(Q$~|kYhKhnwbw4g(PJQ9%tGbMUg_e<dYQ~EkxNavaNGwd
zjf>1T6@%a-5L!O&pTfp&!Mxvl+_t%*tC^-z?4Z*_;rUnys6=<jKjwp4M)fe%QFCrQ
z(7Q<Myl-l(t6s_OZU{-ie7L9XHd?xxwOjMTbh}hj*)7H(&1aCgmEd|Yy+x0&M6M3`
z`wZ9ohUM0UhdR{i1$KN#aP5`QQgM~^UWF&<R)S$vN3KtmT3A~0#OV*Aq`hHbf6>(j
zq9kNZ&u1P^ix{bAAIuZ^82&0a>AW9;QdOBtw=PrDL#OL4Y{`2sP!{AS))D@(2%(fx
zMfkG~2jDcIIuyV}BDtE4JYJwSA_|m38IC{%hcYk4&tllZuO`tSr>t|+^f)tG2ct=L
zuIH>NCLaOq&%}eIxz66S&I=!_w&(GsSlfFAH8@UX+;~}7y)9L7r9E2Z4u``W{xwNc
z;9`i?v(Y03L7SGWG=na6_@HfOLhx}N28{2ghw?gY0ZM2rqB#su*>jSC89L-M2N-*5
z?%pW<kX{|PS~e%VCo0~t`1KVd@e-VMh!QvRXQ;aDI4@qpI?I|+M=_&14AOx2kN&ck
z&Ea5$a{5>6t>-!iiU-g2Z_N0BI=&=(SHaq$$dl~$ZCtyuOzNIPi73m~7*tdCCs|De
zO!N%~{bgc?5gQSC#Uodq;Ub*_5n>532wO~P!p}tJsS}PJ<6#gvtJvM}DhEb3?y*kG
z(8sY=l{YB)DGrF^%qvk*RpB0t0Omv>sW7Zngt#!w(Zyxj|5(6#Ntynx6Pe_PluMu6
zzCvUoQuzoG;w;|l9n41DihR*(L<&cnTBUn3k}&AJ*k!R}|4CbnLS|vu*c?+}%$}iE
zllm$eH9B>*(}-M^e9vbDWk4(PAL5}YPWy_c@krQW5+r;YpEU1n3Pg1=HfT}KsC9}~
zT*q?uqd0k=1e3_El7X{L$QjIs5nuD#M=d8{dRW*MtAhoM>Q|Sm3--edp(-fmF<Har
z?Xw@J7NA9mH;C%Opu1yXNw_}?_iW~jtcV%3pnK|jSJA#%AZvL2e2i|kIW)?jcI-J<
zU&1m;`*S3Ia?M`+3atC`bNCkGWG+;e8awb4GDQpe-4fw3hU=b>=%vTKKcXYF9kqkw
zS@eY#*jKXjWDuP{%_evj!+Xq$LBG3bPt#jaRovPxmsPd=-Yb3lM=l_L7%a2M@iR52
zQ5_B#6MTCa<=$C`jyxV>+`VhR{2VB{40h<G>_aLBk4x~<<vR8BX<djKyK~vAXCb{D
z=CAB+dk<Q*{VQ0%3nK3cXpQ(gGm2Md&Q_E`?65%Qruh$fG}Q06k(pMaMgq-hlAcUZ
zLItF35&0qsGf47Rrn5DZmFJcvVsc=yJ)UN`)NOPoNc^QVFI}wI>?DX$Or-RV94myN
zsOw<#u6F?Hq1OIz3QMpbJg_?t<2Wo~1a4ke(Pgc`-7ZG)JPwE^izI!hTwc}cv-rpb
zhWikfLE|$|6l40v#Hcs2N3TK_-f08@CzzlzXmGtbf8WlJN0xxCq=;0L=8)cuC{tVm
z%|S$_Skt~Mw9C-Ip?({x$>$4mmq>_lKzmILMD%`OS1OW{GF(DL4bV68?Kl-&zlf-s
zmEo?_a?mk12fG1rS-B~MjnI|mv`gZeJ@a$5haYM@8jZ)wKY%vq1N{QjM}szmC<ybM
zGRNZ`V-ca53!VJ(!x@>;ei7_12L+v&wMnG2R>MSid?CehfuM>uFcQVHgBoZ;$;f5J
zFXpM#uMQ6Ik%i4SrCE%Dc`_NgkHjuqH_M@lXl(a%(BcH-kyPu#=1Z*Wp7W$>y*wKD
zhP0@Z*flcea{MQmsm29?Q=bo%3+jJCfZL1Z5%|O-oHesKxsn!pIKiK<BKOv1T#&1m
z*S|%}?NoNsQvTxdoG+8&p~Qix>WAIfCY|81H75|v9wP1<e5<-;g?8yCDuNEqPZ17i
zW>yc}`00fYLQ~XFRsDC_ixTQ^(XI0MK%1`)GM6T@8jjIvW(8DY6`{giIUyp1Yqafx
z*@z1M)50OQkyY04&q3ZV-t7^XJ<hg^mD+ox^AZ4HX6~AN7Etpe{ebp>N|q6}g;Q=V
z5(EHI3E<wPLw<-Rq*jGJQ@bCvsUuFQC&@*~Yx0gT;}?Fpq8P62r4AwrO&-=79=S21
zk7$r7%_ZDNCRZhW<x0kiW%XvlveX?-A^76!ZBEkpTsKu~G+E=jXK!ET*or-^>~j63
z7i5zHdj*<c1RTd)6^T?JHSClwTnb4$ol_VGJzb|<qNqpf-LYONj3Wq%qcp%6{v}@J
zJ?yy@h)STf6)!BrBfnnkHH`gGr7idimVQd^l~p3LZBy^&<yY7&o`dMbq-XCG%rJPR
z#EIl{)0rQF33J$3nKh@2$TWcOdKq`oSVSJKqJ|X9ls%mx!t&Sqf@~Z4VB;s}q5~ja
zb)n3w_W;2H#k_ZoW5iP!@s4pzdICK7JL&4>{2hNGPFX&nL<k+!5=h!pV00NTtW|l(
zY#i|M`syY9>lE<z%WAhv_d{%Y?$C?%=NF}~PzddHid7Iw=6w*-kmR0Y+zlE_gM^Ml
z|B;OSO;+v*>h;`%W)et6mdZ#bT67}2KS&g$4*c0Sg?P?o^Cy@3xsbKj)wE21h15vu
zlX7sk4N5*qqLB^x@Y<5poJX31Z}KHKM23DvEyk2QLetIX$aPkyqIstS>}I6862ZMR
z7aHX}`*jl->eryC87-A<uO=8$!2r5kEf?Rr<#v{lk-Zi}JK}Wd!1FI>*{v9wMj=Pc
z<Np{8)*ah`*Y!_1AT+M=9~Knap?kqOfP8>P0S0B^BPMJ2PNHqg;k~&^5AEop^dV<V
zypYzKe~4e5Yug9j<d71RA}D-bGqJLRjllktENjc-!9%N&y)R#MLNO~Vp5AZQgy@8~
zLDI)6B-b2QzeeS})O-Aiis}wk@YZ>%CNx~@Eo9d2Ha`vs5%%_OxL*yrLa_K|=1A<=
zZ@dWKxU1d=wqpDxOAW%X#x-N$%desl(gL0m!r`@$N?|IkxW{idRtILVzup4F8s1U(
zt=zGpFQcZ(C;QD=QaTSwz#eu8-Zy%{b2)f@FG~lS=Qwo3g72$mOHRF_@=zg`=scQl
zZ1)0oF?p`_EaGtoIba&c_B<w_lI9X!<HFb@e2T(7+Whj*Zxg%n(=R`@GYeqFo(q4(
zQyE^8c(Q(U{|uLp#YubReJ-#Osdb8+b=Z&)D2y~-M03MD7^yFkcHGB1=rGJqt><f{
zi#X0PdAiB&ed)eiTcx#~rV6c=aJPzKzrNaHy`o2^D>IdTJBcR|j(*d5<;jJv7X`7q
z#rt<3a9EhXG0XDw$!Z9-!>o)L)md++69SM?`+R>2413;_!EDx$h5px)WmEjKXUJY%
z!Qsvs%auUtsa9X22Yc%lcwj5(wsoJ_f=esx7cqFUMGP}zEyV^bV>{Z$2pzulN^R_Y
zKW*Md*G1}1a5UdFR@>b{^`*k-Eu;G}RK;n8Wxnb1Trh7mMncFY;bTXq=;;0&-F0Kz
zIDeT;yPRvqx@?^<uOu^eXmc}~M+%2n!26zTcP_J`A9n8ht1S-0IU+BSI<_|PU>@!O
z3=BN*`gRiO+5xKre22om&K*PA$Gb=NJ!{%`-6}K9u`GS4rLnMa*6yTSk?Yf$k{3=H
z9eo7_Az7vEPwoauM)rk%Dzcvmn>~|v`J4@`JH)(njEPebkV`49j>Git`O6oXTscI!
zq8t~z&gV@jFzqRv6#dR4mU;&b`CWU1`JqpzzOI~*u`fhOioo<a#jOsw6vQV(144z0
z@9+QN27arwl@k_zn|uCk?fKDcYkj@ZWj`<D$50Y&`$!|2V~{S}bI2?nV-Fj>L+Qr+
zq!E<Glu%4=ehKRPX&-S`-6@8N6j+F3Et%batqv`z2%a4y!XB-oOTe*Q?#Wd<EDeJJ
zs=?6SqOh}(-7P7wqVg1@+^cM307LY?!1ls1DP`oUUgZP6^OE)kV-S^y^$dAsdx13L
z!P01#Oy~zjt;qI+Kbf1iB%_gKtitMGnJT9<2n$LBPSl($JP+-hwvS(*i#REiXBo|G
z`31^nAy_JN6WM&e^2etkFTHf8tFs9NM{UK68LhT~>*`x|E+q3(V<?S!j5o9q?b^zg
z6%A@i$=IkJ^WmVEjBmLvJWzpVjtYV-N~kDX4Bf#0(v?(v3nJ_5yj`jAazoS0hBzMx
z--Y*rQ5qYMdWWY5w!IuG1#i`RDD^7jYQwc3am?<zde7eIQQ@e%1c%|`fSX>kIvT~!
zU_8R^1e-av(HDdf4+K9l-4>5MIW=vo-j_$<paqSu*Q5ror$97hj+l_UL<Z2r(Y7q6
zNf4)LR-8rS2QsDgZ=g)}fsBsUV!St`qAio)Eu9oT`utT6O&o*ZeZEFx3CU5{==IlE
z4rD|Wp6wg!b(gKn?Ja(H5g*dlnvpEmPGL!Jw>P+Rs*z$rXIpeIu-``BB!QZ`*z~-y
zt$48nMBX)t?JMBTMRzZ(r^~m-a<uPwS4PC$hf-KR?SV_=&7?>Sn?So_)CR8)1B$e7
zE~Tbq<W|&{3oZ_yQY_m2qc3VBD+pT>SGezYz6AWkEESc#CV9<1Z=^H0@d6K^0fo{a
zmDK4;W_*f}1-m|eoQH&+vzAs}c(VxPwAQBU_7x!hVzr2F4;bl~`QWPcf_jeryju~z
z&0KO?BX~r~jf99Mu9O;2A@HQ7$!gji^snRA2@oMx!5r+i{4I3PNcW85XMK(KnZFam
zAW(x_=qCLj-Hk*=Hwvx4fCXSjm1AG70_&=X24HHYp4v5A1|S6ddYH|av;~v)b$q8H
z8PJBa<2okO2+sk;ARlz6+YWMM@XDt(+w2^pu6kUknGs=ma(m7Gm!CUA8m=oOv9V6w
z)wR&V#%df{RA)T7ZJLa^;#1OxZDJ>64rbTqc5G+zXP#u8Wh226o`Wu?8+1k7K2=SO
z9!;`4bcBkIRqT>WhMe$8JnoYf4IHGy#t|;h*bi!l;~)$kQ>o?hI=FNEX6qHI=p|hD
z&BdEKY-7~#ymaJoY@Z+NrHbtH$_t{L4v7B@YsK-!C!u1~Zx&;MwD)(`$+yY^F|!|W
zja$HjAOqAqt;9%YHZ5O6REBN3&w(#KihP;}lfhN|3F$?uJ^=hf?iLoN#_{->U`$3|
zK+&)a?|B!vT_=`n7bXB>=K?BMb^k1{)3Ps=N!<XJLG;2*(cTDdbDGT28^$8eV`W&A
z{ZMJGMhfAVP!^Bzp)G?gC=mK4_T8i^nw)nltDLZUhjNCfFQ7DQ`sO{8OThUH+;H-*
z<!Z@~RDc{g2#N&Khj(JqPEcAqv_+v-jXeR3UCi1mMD06iXWNDf+bH$Pt0YDdWLX7Y
zedRMSHffMjA9$-iL5vis9Ha#M4dyR<y_T+@^Vh0qJV1WJVQslykt)6AKpcf?L|`eY
zd3}WV%lHTw1F+vn=ZkW%<=50xh2rvW3fin0ia%XX-x4m<t&<Y5)A_7ocEkg0=GQUP
z@v^M*x!e(4>{C-EY-56nXsW>K_UOAi>R4Z{J&?0ZO(}@F0eia0P_wB;_!y(6_3Jkm
zL&l8aKq=Poq5uHkCn(UA%IQN#DdJGL{Q!E+{C1ZxD?aS=qX|=p%{`y9SYwL|W^|s&
zF_Ay_=2{%%kgn`p=J9uIZ-=;y-h=nc>ABE6s{5o&Ecb|QPKnM7=N@S-S;+yHY8SAj
zB;7L;Liw5$;gh!t7NKb;Y&pGiOkd19s?HYN`(iD*m+_U$l?&8Q*%<p87bLxf>pbmq
zXuIPSMGkh@3)!OHd!@$?cNu5KYco1Cm%z~-)8(IXw_1dRz;cX&6lk3%DvLNUwkCl&
z;1DA=@yqR9ymM#Rb_$i*imWB9-a+A?<KDhgpwPQRO)hPquQW<+eoOIPUWHGpQ)t#e
z3xom+E_t=`yqw~PqX%l3X-y}Vz*VZ05aRs<*@KpGB^R%m$M-UYYNC?;wQZ`28969`
zIvl@6WD<+_1@6h^4L%*j!e}q1K+^=PQ4Quc0}k7<$D#0$id@kb=){W()O<Qu@FN#+
z09mKXxMt8jvSL49?wtfLFnhO5lXxq+IiXXl+$Y`2IwD3>nnQB98eh>Fn7T|Ja*xKH
z`$?tSHvD)w6VH({(KqT+R!DG<=SsXt>ukmuOW`m40<do_)!=7dJF{o2CY$O>ntUWb
z!0Rtls^zzr6B~pqGverlU?#`P!z>uAUM^SbcwxFRP#7M8n?p7C=8eM8XPPN)87_)-
zG_BKYS{lQ2CVrZ}#ytz^uTqmmYZN?3BG04t3DLu?s71l=nLE&iulj`QFzcRQ0fVdA
z8Q=aPzeU9;q>cj+GIKDRgm@I?kUj=_LdZk+oI3;>D(ms<+4L?Ia$Z2wy<TyG@*uf^
z!(R>aY~uAhf;%;bGLD3n)wXjE*Wun6qInH?`b6HO(c&%v+-a#^9+k3mwj-6UzC?~_
zY6y=$-)s%X1pR0lPgjQS3e$wb!nic=jzH?>0?kv=j@(&XYpA(Tni09jS<xL6Osb@J
z6(TdEKXK1JSEz@XZ7YXGrY{>VLN~?aSP{)_mbDL{m)ZI%VKM)(NY&IQ?GN54ZS(3B
z%-nmoJ#Xp}8$qAWAd{PPD~%G*@HmrL5XaQ09NQR_a)o7XFx@cBI@Qe-t`(?IEta|t
zgM6rOarSxx1uji&1=4{&4l>5Pgtrx3#^f%u)?an}Xiomz<SKTQ^Cj;L9iw=Fh(}m7
zpC><m&gY8b7HH@;D-B%-fc{_7uSksv)ZelojqBCn#N0+193@V3hw#ogDZMS~GAmTG
zr<>n0jTT&4K<Tg07-niaNom?foMTt-HBpoEj}p!ezX@}Tf2ZIb{weU2w8Gmt9fK_&
z{O8F#I!;NSF`Tp?5P%UA+Z)HbW&x|Ku3|T%f2uBH*{e1FkhH~V3DxJ3=OO}J+es6c
zKG^J>pz1MS;Uf3AOz%x1=5Xr_1o{y#ZF9dy5TvV<B0gJ<_DT+nk*}U)sf({HN>T_s
z8AX0;WDO)aC8wUbD`j+?NNwoV@rk0Wr@wHkHPCe(=CwSDCR;u#Tpzn$X;>+(>hqE7
z#dO^5VX7XoKHotY{{%GgiVj-tvpLaCX+y74_L3oab`>h!(}XTH$%*-nH9Xcceotif
zTq1Eb3OZ-boDN(6I>kG1=lS)syW)Zzt0sFQEKDs{CSFoALmkV#^?-b7CRbIH-kK3b
zl}=rL8fICcY3llqSd5+9?C)J+esoa0y5`$;P&)_8Ty3(1G~8T^Hz7@|a>TSfieZ$D
zoLXDgsP3w@ZgtQS17_bS&H}k(rG|Zekx)_+pBrTs;Xs)%CWVXG5F$oO*01}<K()+%
z{glt=23g9D_nDRmzgREsnlTPP{>EeoOMWAZo^F|3ZJc&S#}bjJ{212MU*vQxY;l5I
z9jel5OkX}>cJw?VS1l_XzDA@>H1uXJP($8iWPwnhmuDBo#v(E3A&t&3EwQuS!gPGZ
z-6Sk)O(*AUJ%`BfehlgL7kxT?!jCAWqD1Qb$HKGx6#_NyYLx0bN2xa30um}R(5vtf
z?S;Z1w-hg)tnn}KVttz7elljNG^fUy(JB^96IDL1f=!qopSWl(H~b}4;Z^tw_jPqs
zjhplSyROM}axhE>#RNQyIn?CydVXA6xG?;Q*jtxm?px}*bz%oH!_k<5@z_w;(4duC
z*P?Ee81XTanvxezy1Y`E@QKZd?Pzi14LBOCf}BxZvzBI(9lOp)E1|186&j@)iq7s1
z>E02$Z|%JG=vE%vr!otzQ)U~lMYfMC)6`Ud#Oqs{*uPGfH}Hf9YSg<#9Tcd~EzK?J
zZYd7<6z&!PFGFdASRQ~B-GYOj8D#s^bbgiQVB^gFgj+}!OPWI%^y{HFUFVNSnF!QB
zFGYiQ2gP03GpQdQJ}+Hs+hM;#HMYf;RwZL>%scRMgatSi-t&0RA^Pf(zW-z$+}`b>
zW!ZRqQf95Vh<fWNx1rDUiJ6L(SD)#wOJ5YtxhJ*o!7^!gT*fu4vj>&08ovt&we-Y_
zjIVrxXox?c|6z{)Z=u>ZH|vK6jLXlDSG2KmyJO>~q=9OLZws1(Cdx%PSE}<I%P0%J
zF+p0>Mz)TYEA42_CsOjSP+*UJ@YY{C*BYOfL<mo<6)UT@Y?w|vzca87J1Tf(Xy4CM
z0;eYb-p_x1cl0QsFDBsaVWal<yk?QVdF+1Am4=}o|HFxcfoF9ec|qW!d6Z;HCx+ga
zr1sXXfD68rrfx0UY1+0szCiM>%5AyoorzBQbooPnd<Ht}cTxw%aVSg#Psf%FZu&o}
zalSd%D`|GjK|t}CFDDs1g=!>tr88lf!r=e4TqjipooYs<{s^%+QpmrF%oL+kPih$R
z<7MAC=iv~D*i_<mp(^4ofcI__NsY_Tix9Q%cj+A2wT>DVK_6v%yO5t{fY1P}CZe-F
zi3HbHq6!51r=*e)`0Fj1jq*Qa<u?FdqBN)mPUz9J`3w0H0>Pqx$Q&OHXz1kQfF!6n
zbZXf4MG+HI`=K&08^jBbw<EbyHEfxzppI~2;r;#2b04g%+XQ`}z=yj={9O5A7`nnI
ziM~a@*g=B_&7er;RO%0dNexRO?+9wlnYMgNaViWx>IOrF=~angz!6fTt|HX<v{K8w
zq02o_Hh-vTPI!7R|Hp?i*3C>kd;bv<CErbQbCpPO{;WVUt5^L1!=^8Wk~5&bj(1u>
zNmuYnFDOwkN<6FMl<RVYfN-OM5xGjpPDZ}cSdhV6p~gS3(u%?#45<9_f$YBM^~e$N
zSp1UtT_EfsVPyUZsxk|i$IFBC!}niQ(##OZuibe@8F^H$hdArz_uKA%)A!AfJUEc$
zU=lGUYQaDao;!kBf$)G`|2$ERIyqgSODG7nd5wU-Mve+q*tLZoG*g`h=}q%N`Sg}m
z!n%Pb-tvkzKKh~{r{h4eIz>Wp|KW`ncv1nsTk9WIL^s`}2F}#pBx|FEebefO@X22Z
zVx+H2m%cJugOD7xwO&S*KtU)6AYdsqX$GD4po0ir&L6iH1<srBFR9rm?=@X*@#e>B
zVUi<!aPPO(4-eLz-Va=#CyDnhPI?9VQvSg~5nU#o0aaT-UI+XAg3-rRejM(#hUWjp
zHnmj~UKDHlUU&L&T~~R{08(P6ML#_D5TsC4w!}p&dDvVA_OTyd4QUj%;|XkTXhtaj
z{>mmln0830<}SS2Xs*aS-{EOyGSm78mu%7Rw6_^laQ*n!b7r0>f{9v7VB+y}g4V80
z=l}8Jch-`PINr0|^UTaZ>zd*r_>FT0r?0GBz<b@~qqMSx^Lo+eGfSybd03}5P8C-4
zDtxnrum+uY%ufC|?T@qt{PB016a4pb1jut{(=-9)XJNSSxy@pz?<f4*G2iapH*!i0
zI-Yo2z#pGQVU6GYjk@#OAO57R+}sIye$U`<Ow#XYvCWsUaYg?CsQJF0p5z7rZQ*^4
zA@{E<|8^6Yz7TF6Hv`M>+i5qSOSBgyZe}#wR=%%R8Rg2ozkS_r=lbXC7p)<s(8dkW
z$ks{zz_NY&@94XU?jY;6`SUx$AAW9=cx3w!_CS2T`PaIV(X71kZ$Dl}3yvl+H#v62
zxBtgd{`v5KZc>|mv+?6kG!?N3+ANoPjof~HNxwt%`EtMh`SW5nedq7z3r2I~Ro@;4
zLo&b9Eab<%-Sj;+-{wuv^ScRXV}j!&UB!Q=CyI&^3LgIld$h4bZ9WMt>>)D#uT{{!
z8mARC{TIi4KO8+t7f}R-Vv4@$v5Eb*@yucf1x^0Hb5+tH_8TdiW#TuwP!So^uLt<^
z;r+bNKY#OW0Rjc^q0q5kQ)GU;tsD26K(CVJ&j<Fq*;K-J<cIgA#aH47?nQ|fKl2Z>
zh%WV?@lZ<VX(zYh22ftg%dSA4Lk}#=rKcZ#w`?ID)>czKwh7Mn3$8ft!#T%vA!3$b
z(T{&s>Ad_Nr!q-}{)NxQo8XIJl$7+A{P;htT8(`+-vBh)E&0`@Xch9GIT12}q|C#A
z*fD~Bl5vqsf0{O>iIqwS{)cYvEIVmd>(lwzhu7YyB#qjp+Q;(m+(&mE&dJ~j-jm$P
z@BAlZ4Q?-y_VWCz^}F#@e%>QT;5p|y+DL37t8)20B-H(JkAK<@*#G`So5d)V&`f4w
zoxpK<fBl>ZHQai7Lc#w8CNnu;2HFtYd*uc!pFTSY*xKp*zTCd0dD@imlP)TB=;H<e
zsneqz!LZr!Ll0LD&+X?r@$=gudUm|QPw<)bYlXjZLhFGlj9eK@!Uw;|UqFeVn;icX
z8x=OOyf>CaP|)G2AiE_O+F4RtyenO;ZCB&Hn}4Ahe?9$P5@JnGCg@DMqn9IlCEBOv
z(Mg8llf;@8AEiBS{SCs2)<NsMA2)8jqO(;~W>4}M8Py9V2A1i)k_}44@Yl`6&W(R^
z4U!C$=>71D+GYsKnunR~q(tQUtqP*4S`?;TmrDD;(LR;zSAFH)*#24uN?#S)rY3mV
z8CyL*1#tergxQeeMg9+bzknzuTiigZ{q604Sc8q%Xdj;V&pZ9o9&UDI%(6hrBK7Km
z@9cLq0|h01@sZ!3Dq25(?t9<uLI!Yp1cI_n=+Mgf%@`nC`gg<o`@*FIST}gnSDsn@
z^tSwNt5p{NyUX`eR#U<aaLE`N6g?$1Wc{B%-)s`uQTOO_p<hA{Iwmhdhk}2#9}vOb
zD?s|f5?(4Py8J_%_KqJv{#PU-6BD}h;HMeCU##ji-`Rh9kU#n6$k227DTC88kSeWf
zfF2dR9QoEiK6KMl_|Kou6b43IYFqRZ$3H$x^j{H~J>b%_!yCS95dT*qvjP320?sts
zBx{4G{Kxlx674JT&&}gMaD7TQkof8iNyGGyNJCVb)KKSNK7PL;B+Mv@I+JaDhy7n3
z^V@#(2ag#oXbOJ(?tioI!+$MA!yCH`UR@zH)={$m<J!@={_`_Kb2pWG5W}<Ra$VSu
zYe#Rn*&J<t-M=`O9|m=&oQO)Fg+<h-td#gKj`?mhzU><gKtvX%5BK}yIAVzH{{==f
z2QiekZ!7xNp*72!YV|+ee|K{P4;WD{IPk~Eqb_m73;)l#%;u1tTaLahwDAJLlSuSk
z{MVs^=r}2bGHxISTxG+**+x-9vR=#o{_*=X!P#~DvI#aSqgr|7yA9oZIsfRX%N!W~
rO0Z?i*RRJoB%`)$*+Ptb9fB=hvg{lw<u38?n=RVvhtzUa%>4fk<BIwW

literal 0
HcmV?d00001

diff --git a/pymllm/README.md b/pymllm/README.md
index e69de29b..bee5ac41 100644
--- a/pymllm/README.md
+++ b/pymllm/README.md
@@ -0,0 +1,3 @@
+# pymllm
+
+![pymllm-arch](../assets/pymllm-arch.png)
diff --git a/pymllm/engine/launch.py b/pymllm/engine/launch.py
new file mode 100644
index 00000000..7ce1be5e
--- /dev/null
+++ b/pymllm/engine/launch.py
@@ -0,0 +1 @@
+import multiprocessing as mp
diff --git a/pymllm/launch_server.py b/pymllm/orchestrator/scheduler.py
similarity index 100%
rename from pymllm/launch_server.py
rename to pymllm/orchestrator/scheduler.py
diff --git a/pymllm/prepare.py b/pymllm/server/launch.py
similarity index 100%
rename from pymllm/prepare.py
rename to pymllm/server/launch.py

From 28b75fb98629e486b36a1211cadc34a32c006688 Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Sat, 21 Feb 2026 03:19:58 +0000
Subject: [PATCH 07/13] feat: update dependencies and enhance configuration
 structure

- Updated `apache-tvm-ffi` version to `0.1.8.post2` in both `pyproject.toml` files.
- Added `pyzmq` to the optional `cuda` dependencies in `pymllm`.
- Introduced `pymllm-server` script for server launch functionality.
- Refactored configuration imports in `pymllm/configs/__init__.py` to streamline access to model and quantization configurations.
- Created new configuration files for model and quantization settings to support enhanced model management.
---
 mllm-kernel/pyproject.toml                    |   2 +-
 pymllm/configs/__init__.py                    |  15 +-
 pymllm/configs/global_config.py               | 606 +++++++++---------
 pymllm/configs/model_config.py                |  31 +
 pymllm/configs/quantization_config.py         |  18 +
 pymllm/configs/server_config.py               | 266 ++------
 pymllm/engine/launch.py                       | 116 +++-
 .../scheduler.py => executor/eager_runner.py} |   0
 pymllm/orchestrator/async_disk_io_process.py  |   3 +
 pymllm/orchestrator/detokenizer_process.py    |   3 +
 pymllm/orchestrator/model_runner_process.py   |   3 +
 pymllm/orchestrator/parallel_state.py         | 122 ++--
 .../orchestrator/request_response_process.py  |  10 +
 pymllm/orchestrator/scheduler_process.py      |   3 +
 pymllm/orchestrator/tokenizer_process.py      |   3 +
 pymllm/server/launch.py                       |  17 +
 pymllm/tests/test_vocab_parallel_embedding.py |  24 +-
 pyproject.toml                                |   5 +-
 18 files changed, 624 insertions(+), 623 deletions(-)
 rename pymllm/{orchestrator/scheduler.py => executor/eager_runner.py} (100%)
 create mode 100644 pymllm/orchestrator/async_disk_io_process.py
 create mode 100644 pymllm/orchestrator/detokenizer_process.py
 create mode 100644 pymllm/orchestrator/model_runner_process.py
 create mode 100644 pymllm/orchestrator/request_response_process.py
 create mode 100644 pymllm/orchestrator/scheduler_process.py
 create mode 100644 pymllm/orchestrator/tokenizer_process.py

diff --git a/mllm-kernel/pyproject.toml b/mllm-kernel/pyproject.toml
index a8dbd98e..77340b29 100644
--- a/mllm-kernel/pyproject.toml
+++ b/mllm-kernel/pyproject.toml
@@ -18,7 +18,7 @@ dependencies = [
   "packaging",
   "torch",
   "torch-c-dlpack-ext",
-  "apache-tvm-ffi == 0.1.8",
+  "apache-tvm-ffi == 0.1.8.post2",
 ]
 
 [project.optional-dependencies]
diff --git a/pymllm/configs/__init__.py b/pymllm/configs/__init__.py
index 86af57be..a23de035 100644
--- a/pymllm/configs/__init__.py
+++ b/pymllm/configs/__init__.py
@@ -1,21 +1,14 @@
 """Configuration module for pymllm."""
 
-from pymllm.configs.global_config import (
-    CacheConfig,
-    GlobalConfig,
-    ModelConfig,
-    RuntimeConfig,
-    get_global_config,
-)
+from pymllm.configs.global_config import GlobalConfig, get_global_config
+from pymllm.configs.model_config import ModelConfig
+from pymllm.configs.quantization_config import QuantizationConfig
 from pymllm.configs.server_config import ServerConfig
 
 __all__ = [
-    # Main singleton
     "GlobalConfig",
     "get_global_config",
-    # Sub configs
     "ServerConfig",
     "ModelConfig",
-    "RuntimeConfig",
-    "CacheConfig",
+    "QuantizationConfig",
 ]
diff --git a/pymllm/configs/global_config.py b/pymllm/configs/global_config.py
index 43783e94..1761697b 100644
--- a/pymllm/configs/global_config.py
+++ b/pymllm/configs/global_config.py
@@ -1,349 +1,321 @@
-"""Global configuration singleton with all server, model and runtime configs."""
+"""Global configuration singleton aggregating all sub-configs."""
 
 from __future__ import annotations
 
-from dataclasses import dataclass, field
+import argparse
+import types
+from dataclasses import MISSING, dataclass, field, fields
 from pathlib import Path
-from typing import Any, Dict, Literal, Optional, TYPE_CHECKING
+from typing import (
+    Any,
+    Callable,
+    Literal,
+    Optional,
+    Sequence,
+    Union,
+    get_args,
+    get_origin,
+    get_type_hints,
+)
 
-if TYPE_CHECKING:
-    from transformers import PretrainedConfig
+from pymllm.configs.server_config import ServerConfig
+from pymllm.configs.model_config import ModelConfig
+from pymllm.configs.quantization_config import QuantizationConfig
 
 
 @dataclass
-class ModelConfig:
-    """Model-specific configuration parsed from HF config.
-    
-    This is a lightweight wrapper around HuggingFace config with
-    additional derived fields for efficiency.
-    """
-    # Original HF config (populated after loading)
-    hf_config: Optional[Any] = field(default=None, repr=False)
-    hf_text_config: Optional[Any] = field(default=None, repr=False)
-    
-    # Model architecture
-    model_type: str = "unknown"
-    architectures: list[str] = field(default_factory=list)
-    
-    # Dimensions
-    hidden_size: int = 0
-    num_hidden_layers: int = 0
-    num_attention_heads: int = 0
-    num_key_value_heads: Optional[int] = None
-    intermediate_size: int = 0
-    vocab_size: int = 0
-    
-    # Context length
-    max_position_embeddings: int = 0
-    context_length: int = 0  # effective context length
-    
-    # Normalization
-    rms_norm_eps: float = 1e-6
-    tie_word_embeddings: bool = False
-    
-    # RoPE
-    rope_theta: float = 10000.0
-    rope_scaling: Optional[Dict[str, Any]] = None
-    
-    # Quantization
-    quantization: Optional[str] = None
-    
-    def __post_init__(self):
-        """Set default kv heads if not specified."""
-        if self.num_key_value_heads is None:
-            self.num_key_value_heads = self.num_attention_heads
-
-
-@dataclass  
-class RuntimeConfig:
-    """Runtime state that changes during execution."""
-    
-    # Distributed state
-    tp_rank: int = 0
-    tp_size: int = 1
-    dp_rank: int = 0
-    dp_size: int = 1
-    pp_rank: int = 0
-    pp_size: int = 1
-    world_rank: int = 0
-    world_size: int = 1
-    local_rank: int = 0
-    
-    # Device
-    device: str = "cuda"
-    
-    # Memory pools
-    max_num_seqs: int = 0
-    max_model_len: int = 0
-    
-    # Scheduler state (mutable during runtime)
-    num_running_reqs: int = 0
-    num_waiting_reqs: int = 0
-    num_swapped_reqs: int = 0
-
+class GlobalConfig:
+    """Singleton that holds every sub-config pymllm needs.
 
-@dataclass
-class CacheConfig:
-    """KV cache configuration."""
-    
-    block_size: int = 16
-    num_gpu_blocks: int = 0
-    num_cpu_blocks: int = 0
-    
-    # Cache dtype
-    cache_dtype: Literal["auto", "float16", "bfloat16", "fp8_e4m3", "fp8_e5m2"] = "auto"
-    
-    # Sliding window
-    sliding_window: Optional[int] = None
-    
-    # Prefix caching
-    enable_prefix_caching: bool = False
+    Usage::
 
+        from pymllm.configs import get_global_config
 
-@dataclass
-class GlobalConfig:
-    """Global configuration singleton containing all configs.
-    
-    This is the single source of truth for all configuration in pymllm.
-    It aggregates ServerConfig, ModelConfig, RuntimeConfig, and CacheConfig.
-    
-    Usage:
-        >>> from pymllm.configs import get_global_config
-        >>> config = get_global_config()
-        >>> 
-        >>> # Access server config
-        >>> config.server.model_path
-        >>> config.server.tp_size
-        >>> 
-        >>> # Access model config
-        >>> config.model.hidden_size
-        >>> config.model.vocab_size
-        >>> 
-        >>> # Access runtime config (mutable)
-        >>> config.runtime.tp_rank
-        >>> config.runtime.device
-        >>> 
-        >>> # Access cache config
-        >>> config.cache.block_size
-        >>> 
-        >>> # Update with new server config
-        >>> config.load_server_config(server_config)
-        >>> 
-        >>> # Update with HF model config
-        >>> config.load_hf_config(hf_config)
+        cfg = get_global_config()
+        cfg.model.model_path
+        cfg.model.hidden_size
+        cfg.quantization.method
+        cfg.server.host
     """
-    
-    # Sub-configs
-    server: "ServerConfig" = field(default=None, repr=False)
+
+    server: "ServerConfig" = field(default=None, repr=False)  # type: ignore[assignment]
     model: ModelConfig = field(default_factory=ModelConfig)
-    runtime: RuntimeConfig = field(default_factory=RuntimeConfig)
-    cache: CacheConfig = field(default_factory=CacheConfig)
-    
-    # Additional metadata
+    quantization: QuantizationConfig = field(default_factory=QuantizationConfig)
+
     _initialized: bool = field(default=False, repr=False)
-    
+
     def __new__(cls):
-        if not hasattr(cls, '_instance') or cls._instance is None:
+        if not hasattr(cls, "_instance") or cls._instance is None:
             cls._instance = super().__new__(cls)
         return cls._instance
-    
+
     def __post_init__(self):
-        # Lazy import to avoid circular dependency
         if self.server is None:
-            from pymllm.configs.server_config import ServerConfig
-            self.server = ServerConfig(
-                model_path=Path("."),  # placeholder
-            )
-    
+            self.server = ServerConfig(model_path=None)
+
     @classmethod
     def get_instance(cls) -> "GlobalConfig":
-        """Get the singleton instance."""
-        if not hasattr(cls, '_instance') or cls._instance is None:
+        if not hasattr(cls, "_instance") or cls._instance is None:
             cls._instance = cls()
         return cls._instance
-    
-    def load_server_config(self, server_config: "ServerConfig") -> None:
-        """Load server configuration and sync related fields."""
-        self.server = server_config
-        
-        # Sync tp/dp/pp sizes to runtime
-        self.runtime.tp_size = server_config.tp_size
-        self.runtime.dp_size = server_config.dp_size
-        self.runtime.pp_size = server_config.pp_size
-        self.runtime.device = "cuda" if server_config.base_gpu_id >= 0 else "cpu"
-        
-        self._initialized = True
-    
-    def load_hf_config(self, hf_config: "PretrainedConfig") -> None:
-        """Load HuggingFace model configuration."""
-        from transformers import PretrainedConfig
-        
-        # Store original
-        self.model.hf_config = hf_config
-        
-        # Get text config (for multimodal models)
-        if hasattr(hf_config, "text_config"):
-            self.model.hf_text_config = hf_config.text_config
-            text_config = hf_config.text_config
-        else:
-            text_config = hf_config
-            self.model.hf_text_config = hf_config
-        
-        # Extract fields
-        self.model.model_type = getattr(text_config, "model_type", "unknown")
-        self.model.architectures = getattr(text_config, "architectures", [])
-        
-        self.model.hidden_size = getattr(text_config, "hidden_size", 0)
-        self.model.num_hidden_layers = getattr(text_config, "num_hidden_layers", 0)
-        self.model.num_attention_heads = getattr(text_config, "num_attention_heads", 0)
-        self.model.num_key_value_heads = getattr(text_config, "num_key_value_heads", None)
-        self.model.intermediate_size = getattr(text_config, "intermediate_size", 0)
-        self.model.vocab_size = getattr(text_config, "vocab_size", 0)
-        
-        # Context length
-        self.model.max_position_embeddings = getattr(
-            text_config, "max_position_embeddings", 0
+
+    @classmethod
+    def reset(cls) -> None:
+        """Destroy the singleton (useful in tests)."""
+        cls._instance = None
+
+
+def _parse_bool(value: Any) -> bool:
+    """Convert common CLI boolean spellings into ``bool``.
+
+    This helper is intentionally permissive because CLI users often provide
+    booleans in different forms (for example ``true``, ``1``, ``yes``,
+    ``false``, ``0``, ``no``). The function raises ``argparse.ArgumentTypeError``
+    to integrate naturally with ``argparse`` validation and error reporting.
+    """
+
+    if isinstance(value, bool):
+        return value
+    if value is None:
+        return True
+
+    lowered = str(value).strip().lower()
+    if lowered in {"1", "true", "t", "yes", "y", "on"}:
+        return True
+    if lowered in {"0", "false", "f", "no", "n", "off"}:
+        return False
+    raise argparse.ArgumentTypeError(
+        f"Invalid boolean value: {value!r}. Expected one of true/false, 1/0, yes/no."
+    )
+
+
+def _unwrap_optional(annotation: Any) -> tuple[Any, bool]:
+    """Return ``(inner_type, is_optional)`` for Optional/Union annotations."""
+
+    origin = get_origin(annotation)
+    if origin not in (Union, types.UnionType):
+        return annotation, False
+
+    args = [arg for arg in get_args(annotation) if arg is not type(None)]
+    if len(args) == 1 and len(get_args(annotation)) == 2:
+        return args[0], True
+    return annotation, False
+
+
+def _converter_for_annotation(annotation: Any) -> Optional[Callable[[str], Any]]:
+    """Map a type annotation to an ``argparse`` converter.
+
+    Only scalar, CLI-friendly annotations are supported. Complex runtime fields
+    (for example nested dict/object handles) are intentionally excluded from the
+    generated CLI surface to keep the interface predictable and safe.
+    """
+
+    inner, _ = _unwrap_optional(annotation)
+    origin = get_origin(inner)
+    if origin is not None:
+        if origin is Literal:
+            literal_values = get_args(inner)
+            if literal_values:
+                return type(literal_values[0])
+            return str
+        return None
+
+    if inner in (str, int, float):
+        return inner
+    if inner is Path:
+        return Path
+    return None
+
+
+def _is_bool_annotation(annotation: Any) -> bool:
+    """Return ``True`` if annotation represents a bool/Optional[bool] field."""
+
+    inner, _ = _unwrap_optional(annotation)
+    return inner is bool
+
+
+def _format_default_for_help(value: Any) -> str:
+    """Create a concise, readable default string for CLI help text."""
+
+    if value is MISSING:
+        return "<required>"
+    if value is None:
+        return "None"
+    if isinstance(value, Path):
+        return str(value)
+    return repr(value)
+
+
+def make_args(
+    parser: Optional[argparse.ArgumentParser] = None,
+) -> argparse.ArgumentParser:
+    """Create an ``argparse`` parser with two-level GlobalConfig CLI options.
+
+    The generated options follow the naming pattern ``--<section>.<field>`` so
+    each sub-config can be configured independently:
+
+    - ``server`` options map to :class:`ServerConfig` fields.
+    - ``model`` options map to :class:`ModelConfig` fields.
+    - ``quantization`` options map to :class:`QuantizationConfig` fields.
+
+    Examples
+    --------
+    - ``--server.host 0.0.0.0``
+    - ``--server.port 8080``
+    - ``--server.sleep_on_idle`` (implicit true)
+    - ``--server.sleep_on_idle false`` (explicit false)
+    - ``--quantization.method awq``
+
+    Design notes
+    ------------
+    - Options are generated from dataclass metadata, which keeps the CLI surface
+      synchronized with config definitions and avoids manual drift.
+    - Parser defaults are suppressed (``argparse.SUPPRESS``), so ``read_args``
+      can reliably detect whether a value was explicitly provided by the user.
+    - Only CLI-friendly scalar fields are exposed; runtime-only fields are
+      skipped automatically.
+    """
+
+    if parser is None:
+        parser = argparse.ArgumentParser(
+            prog="pymllm",
+            description="CLI options for configuring pymllm GlobalConfig.",
         )
-        self.model.context_length = self._get_context_length(text_config)
-        
-        # Normalization
-        self.model.rms_norm_eps = getattr(text_config, "rms_norm_eps", 1e-6)
-        self.model.tie_word_embeddings = getattr(
-            text_config, "tie_word_embeddings", False
+
+    cfg = GlobalConfig.get_instance()
+    sections: list[tuple[str, Any]] = [
+        ("server", cfg.server),
+        ("model", cfg.model),
+        ("quantization", cfg.quantization),
+    ]
+
+    for section_name, section_obj in sections:
+        section_group = parser.add_argument_group(
+            f"{section_name} config",
+            f"Options for the '{section_name}' section of GlobalConfig.",
         )
-        
-        # RoPE
-        self.model.rope_theta = getattr(text_config, "rope_theta", 10000.0)
-        self.model.rope_scaling = getattr(text_config, "rope_scaling", None)
-        
-        # Sync to cache config
-        self.cache.sliding_window = getattr(text_config, "sliding_window", None)
-    
-    def _get_context_length(self, config: "PretrainedConfig") -> int:
-        """Extract effective context length from config."""
-        # Try various fields
-        for key in ["max_position_embeddings", "n_positions", "seq_length"]:
-            if hasattr(config, key):
-                value = getattr(config, key)
-                if isinstance(value, int) and value > 0:
-                    return value
-        return 2048  # default
-    
-    def update_runtime(self, **kwargs) -> None:
-        """Update runtime configuration."""
-        for key, value in kwargs.items():
-            if hasattr(self.runtime, key):
-                setattr(self.runtime, key, value)
-            else:
-                raise AttributeError(f"RuntimeConfig has no attribute '{key}'")
-    
-    def update_cache(self, **kwargs) -> None:
-        """Update cache configuration."""
-        for key, value in kwargs.items():
-            if hasattr(self.cache, key):
-                setattr(self.cache, key, value)
-            else:
-                raise AttributeError(f"CacheConfig has no attribute '{key}'")
-    
-    def temp(self, **kwargs):
-        """Context manager for temporary config changes.
-        
-        Usage:
-            # Modify runtime config temporarily
-            with config.temp(runtime=config.runtime):
-                config.runtime.tp_size = 2
-                # ... do something with tp_size=2
-            # runtime restored to original values
-        """
-        return _TempGlobalConfig(self, **kwargs)
-    
-    def to_dict(self) -> Dict[str, Any]:
-        """Serialize all configs to dictionary."""
-        return {
-            "server": self.server.to_dict() if self.server else {},
-            "model": self._model_to_dict(),
-            "runtime": self._runtime_to_dict(),
-            "cache": self._cache_to_dict(),
-        }
-    
-    def _model_to_dict(self) -> Dict[str, Any]:
-        """Convert model config to dict."""
-        return {
-            "model_type": self.model.model_type,
-            "architectures": self.model.architectures,
-            "hidden_size": self.model.hidden_size,
-            "num_hidden_layers": self.model.num_hidden_layers,
-            "num_attention_heads": self.model.num_attention_heads,
-            "num_key_value_heads": self.model.num_key_value_heads,
-            "intermediate_size": self.model.intermediate_size,
-            "vocab_size": self.model.vocab_size,
-            "context_length": self.model.context_length,
-        }
-    
-    def _runtime_to_dict(self) -> Dict[str, Any]:
-        """Convert runtime config to dict."""
-        return {
-            "tp_rank": self.runtime.tp_rank,
-            "tp_size": self.runtime.tp_size,
-            "world_rank": self.runtime.world_rank,
-            "world_size": self.runtime.world_size,
-            "device": self.runtime.device,
-        }
-    
-    def _cache_to_dict(self) -> Dict[str, Any]:
-        """Convert cache config to dict."""
-        return {
-            "block_size": self.cache.block_size,
-            "num_gpu_blocks": self.cache.num_gpu_blocks,
-            "cache_dtype": self.cache.cache_dtype,
-        }
+        type_hints = get_type_hints(type(section_obj))
+        for dc_field in fields(section_obj):
+            if dc_field.name.startswith("_"):
+                continue
+
+            annotation = type_hints.get(dc_field.name, dc_field.type)
+            option = f"--{section_name}.{dc_field.name}"
+            dest = f"{section_name}__{dc_field.name}"
+            default_value = getattr(section_obj, dc_field.name)
 
+            if _is_bool_annotation(annotation):
+                section_group.add_argument(
+                    option,
+                    dest=dest,
+                    nargs="?",
+                    const=True,
+                    type=_parse_bool,
+                    default=argparse.SUPPRESS,
+                    help=(
+                        f"{section_name}.{dc_field.name} (bool, default: "
+                        f"{_format_default_for_help(default_value)}). "
+                        "Can be provided as a flag for true or with an explicit value."
+                    ),
+                )
+                continue
 
-class _TempGlobalConfig:
-    """Context manager for temporary global config changes.
-    
-    Supports nested keys like "runtime.tp_size" to modify sub-configs.
+            converter = _converter_for_annotation(annotation)
+            if converter is None:
+                # Skip non-scalar or runtime-only fields (e.g. arbitrary objects).
+                continue
+
+            section_group.add_argument(
+                option,
+                dest=dest,
+                type=converter,
+                default=argparse.SUPPRESS,
+                help=(
+                    f"{section_name}.{dc_field.name} (default: "
+                    f"{_format_default_for_help(default_value)})."
+                ),
+            )
+
+    return parser
+
+
+def read_args(
+    argv: Optional[Sequence[str]] = None,
+    parser: Optional[argparse.ArgumentParser] = None,
+) -> GlobalConfig:
+    """Parse CLI args and apply overrides to the singleton ``GlobalConfig``.
+
+    Parameters
+    ----------
+    argv
+        Optional argument vector. If ``None``, ``argparse`` reads from
+        ``sys.argv`` (standard CLI behavior).
+    parser
+        Optional parser to use. When omitted, this function builds one through
+        :func:`make_args`.
+
+    Returns
+    -------
+    GlobalConfig
+        The singleton config instance after CLI overrides have been applied.
+
+    Behavior
+    --------
+    1. Parse all generated ``--section.field`` options.
+    2. Apply only explicitly provided options (no accidental overwrite by parser
+       defaults).
+    3. Rebuild ``ServerConfig`` when server fields change so validation in
+       ``ServerConfig.__post_init__`` and ``_validate`` remains enforced.
+    4. Keep ``server.model_path`` and ``model.model_path`` aligned when only one
+       side is explicitly overridden (the same precedence used by runtime config
+       loading conventions).
     """
-    
-    def __init__(self, config: GlobalConfig, **kwargs):
-        self.config = config
-        self.temp_values = kwargs
-        self.old_values = {}
-    
-    def _get_nested_attr(self, key: str):
-        """Get attribute, supporting dot notation for nested access."""
-        if "." in key:
-            parts = key.split(".")
-            obj = self.config
-            for part in parts[:-1]:
-                obj = getattr(obj, part)
-            return getattr(obj, parts[-1])
-        return getattr(self.config, key)
-    
-    def _set_nested_attr(self, key: str, value):
-        """Set attribute, supporting dot notation for nested access."""
-        if "." in key:
-            parts = key.split(".")
-            obj = self.config
-            for part in parts[:-1]:
-                obj = getattr(obj, part)
-            setattr(obj, parts[-1], value)
-        else:
-            setattr(self.config, key, value)
-    
-    def __enter__(self):
-        for key, value in self.temp_values.items():
-            self.old_values[key] = self._get_nested_attr(key)
-            self._set_nested_attr(key, value)
-        return self.config
-    
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        for key, value in self.old_values.items():
-            self._set_nested_attr(key, value)
-        return False
+
+    if parser is None:
+        parser = make_args()
+
+    namespace = parser.parse_args(argv)
+    parsed = vars(namespace)
+    cfg = GlobalConfig.get_instance()
+
+    # Server: reconstruct to preserve validation behavior.
+    from pymllm.configs.server_config import ServerConfig
+
+    server_updates: dict[str, Any] = {}
+    for dc_field in fields(cfg.server):
+        key = f"server__{dc_field.name}"
+        if key in parsed:
+            server_updates[dc_field.name] = parsed[key]
+    if server_updates:
+        server_values = {
+            dc_field.name: getattr(cfg.server, dc_field.name)
+            for dc_field in fields(cfg.server)
+        }
+        server_values.update(server_updates)
+        cfg.server = ServerConfig(**server_values)
+
+    # Model / Quantization: in-place updates are sufficient.
+    for section_name, section_obj in (
+        ("model", cfg.model),
+        ("quantization", cfg.quantization),
+    ):
+        for dc_field in fields(section_obj):
+            key = f"{section_name}__{dc_field.name}"
+            if key in parsed:
+                setattr(section_obj, dc_field.name, parsed[key])
+
+    # Keep model path synchronized when only one side is explicitly overridden.
+    server_model_overridden = "server__model_path" in parsed
+    model_model_overridden = "model__model_path" in parsed
+    if server_model_overridden and not model_model_overridden:
+        cfg.model.model_path = cfg.server.model_path
+    elif model_model_overridden and not server_model_overridden:
+        cfg.server.model_path = cfg.model.model_path
+
+    cfg._initialized = True
+    return cfg
 
 
-# Convenience function
 def get_global_config() -> GlobalConfig:
-    """Get the global config singleton instance."""
+    """Return the global config singleton."""
     return GlobalConfig.get_instance()
diff --git a/pymllm/configs/model_config.py b/pymllm/configs/model_config.py
index e69de29b..c23dff1d 100644
--- a/pymllm/configs/model_config.py
+++ b/pymllm/configs/model_config.py
@@ -0,0 +1,31 @@
+"""Lightweight model configuration: path + HuggingFace config handle."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Optional
+
+
+@dataclass
+class ModelConfig:
+    """Minimal model config wrapping a HuggingFace PretrainedConfig.
+
+    Attributes on ``hf_config`` are flattened onto this object::
+
+        cfg = get_global_config().model
+        cfg.hidden_size          # -> hf_config.hidden_size
+        cfg.vocab_size           # -> hf_config.vocab_size
+        cfg.text_config          # -> hf_config.text_config (multimodal)
+    """
+
+    # Populated at runtime via ``transformers.AutoConfig.from_pretrained``
+    hf_config: Optional[Any] = field(default=None, repr=False)
+
+    def __getattr__(self, name: str) -> Any:
+        hf = object.__getattribute__(self, "hf_config")
+        if hf is not None and hasattr(hf, name):
+            return getattr(hf, name)
+        raise AttributeError(
+            f"'{type(self).__name__}' has no attribute '{name}' "
+            f"(also not found on hf_config)"
+        )
diff --git a/pymllm/configs/quantization_config.py b/pymllm/configs/quantization_config.py
index e69de29b..850ea82b 100644
--- a/pymllm/configs/quantization_config.py
+++ b/pymllm/configs/quantization_config.py
@@ -0,0 +1,18 @@
+"""Quantization settings for model weights and KV cache."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Literal, Optional
+
+
+@dataclass
+class QuantizationConfig:
+    """Quantization configuration for weights and KV cache."""
+
+    # Weight quantization method (e.g. "awq", "gptq", "fp8", None for no quant)
+    method: Optional[str] = None
+    # KV cache data type override
+    kv_cache_dtype: Literal[
+        "auto", "float16", "bfloat16", "fp8_e4m3", "fp8_e5m2"
+    ] = "auto"
diff --git a/pymllm/configs/server_config.py b/pymllm/configs/server_config.py
index 56be4fc4..7cda9c3b 100644
--- a/pymllm/configs/server_config.py
+++ b/pymllm/configs/server_config.py
@@ -2,266 +2,118 @@
 
 from pathlib import Path
 from typing import Any, Literal, Optional
-from dataclasses import asdict, dataclass, field
+from dataclasses import dataclass, field
 
 
 @dataclass
 class ServerConfig:
-    """
-    Centralized runtime configuration for the MLLM server.
+    """Centralized runtime configuration for the MLLM server."""
 
-    The fields are grouped by operational concern so that:
-    - CLI args can map directly to this dataclass.
-    - YAML/JSON config files can be loaded and validated in one place.
-    - future extensions can follow a predictable structure.
-    """
-
-    # -------------------------------------------------------------------------
-    # Model and tokenizer settings
-    # -------------------------------------------------------------------------
-    # Required path to the model checkpoint directory or model identifier.
-    model_path: Path
-    # Optional tokenizer path; when omitted we fall back to `model_path`.
+    # --------------------------------------------------------------------- #
+    # Model and tokenizer configuration
+    # --------------------------------------------------------------------- #
+    model_path: Optional[Path] = None
     tokenizer_path: Optional[Path] = None
-    # Tokenizer bootstrap strategy:
-    # - "auto": infer tokenizer mode from model type.
-    # - "slow"/"fast": force a specific tokenizer implementation.
     tokenizer_mode: Literal["auto", "slow", "fast"] = "auto"
-    # Number of worker threads/processes used by tokenizer service.
-    tokenizer_worker_num: int = 1
-    # Skip tokenizer initialization at startup to reduce cold-start latency.
-    skip_tokenizer_init: bool = False
-    # Model loading format hint for loader backends.
-    load_format: Literal["auto", "pt", "safetensors", "gguf"] = "auto"
-    # Allow loading custom model code from remote repositories.
+    load_format: Literal["auto", "safetensors"] = "auto"
     trust_remote_code: bool = False
-    # Explicit context length; `None` means infer from model config.
+    download_dir: Optional[Path] = None
     context_length: Optional[int] = None
-    # Model precision policy for weights and activations.
     dtype: Literal["auto", "float16", "bfloat16", "float32"] = "auto"
-    # Quantization algorithm to apply at load time.
-    quantization: Optional[str] = None
-    # KV cache dtype; can differ from model dtype for better memory trade-offs.
-    kv_cache_dtype: Literal["auto", "float16", "bfloat16", "fp8_e4m3", "fp8_e5m2"] = (
-        "auto"
-    )
-    # HuggingFace revision/commit/tag for deterministic model resolution.
-    revision: Optional[str] = None
-    # Optional custom directory used to cache downloaded model artifacts.
-    download_dir: Optional[Path] = None
 
-    # -------------------------------------------------------------------------
-    # HTTP / API server settings
-    # -------------------------------------------------------------------------
-    # Host address the HTTP server binds to.
+    # --------------------------------------------------------------------- #
+    # HTTP / API server
+    # --------------------------------------------------------------------- #
     host: str = "127.0.0.1"
-    # TCP port exposed by the HTTP server.
     port: int = 30000
-    # Optional FastAPI root path when running behind a reverse proxy.
     fastapi_root_path: str = ""
-    # API key required by client-facing endpoints.
     api_key: Optional[str] = None
-    # Admin API key for privileged management endpoints.
     admin_api_key: Optional[str] = None
-    # Public model name returned in OpenAI-compatible API responses.
     served_model_name: Optional[str] = None
-    # Path used for server-side file uploads or temporary user artifacts.
     file_storage_path: Path = Path("mllm_storage")
 
-    # -------------------------------------------------------------------------
-    # Runtime and scheduling behavior
-    # -------------------------------------------------------------------------
-    # Fraction of total GPU memory reserved for static allocations
-    # (primarily model weights + KV cache).
+    # --------------------------------------------------------------------- #
+    # Scheduling and memory
+    # --------------------------------------------------------------------- #
     mem_fraction_static: Optional[float] = None
-    # Maximum number of requests concurrently executing in scheduler.
-    max_running_requests: Optional[int] = None
-    # Maximum queued requests waiting for execution.
+    max_running_requests: Optional[int] = 1
     max_queued_requests: Optional[int] = None
-    # Hard cap of total active tokens across all in-flight requests.
     max_total_tokens: Optional[int] = None
-    # Prefill chunk size used to trade throughput vs memory pressure.
     chunked_prefill_size: Optional[int] = None
-    # Upper bound for tokens accepted in a single prefill pass.
-    max_prefill_tokens: int = 16384
-    # Scheduling policy:
-    # - "fcfs": first-come-first-served fairness.
-    # - "lpm": longest-prefix-match style cache locality optimization.
-    schedule_policy: Literal["fcfs", "lpm"] = "fcfs"
-    # Conservative multiplier for scheduler admission decisions.
-    # Values > 1.0 are safer for OOM avoidance but may reduce utilization.
+    max_prefill_tokens: int = None
+    schedule_policy: Literal["auto", "fcfs"] = "fcfs"
     schedule_conservativeness: float = 1.0
-    # Enable low-power sleep while idle to reduce background GPU usage.
     sleep_on_idle: bool = False
-    # Stream partial output every N decode steps when streaming is enabled.
     stream_interval: int = 1
-    # Enable token streaming in generation responses.
     stream_output: bool = True
 
-    # -------------------------------------------------------------------------
-    # Parallelism and distributed deployment
-    # -------------------------------------------------------------------------
-    # Tensor parallel size (intra-layer sharding).
-    tp_size: int = 1
-    # Data parallel size (replicated model workers).
-    dp_size: int = 1
-    # Expert parallel size for MoE-style models.
-    ep_size: int = 1
-    # Pipeline parallel size (inter-layer partitioning).
-    pp_size: int = 1
-    # Number of nodes participating in distributed serving.
-    nnodes: int = 1
-    # Rank of current node in multi-node topology.
-    node_rank: int = 0
-    # Torch distributed init address, e.g. "host:port".
-    dist_init_addr: Optional[str] = None
-    # Optional NCCL communication port override.
-    nccl_port: Optional[int] = None
-    # Timeout in seconds for distributed collectives.
-    dist_timeout: Optional[int] = None
-    # Base GPU index used for process-to-device mapping.
+    # --------------------------------------------------------------------- #
+    # Threads
+    # --------------------------------------------------------------------- #
+    enable_disk_io_async: bool = False
+    disk_io_async_thread_count: int = 1
+
+    # --------------------------------------------------------------------- #
+    # Device
+    # --------------------------------------------------------------------- #
     base_gpu_id: int = 0
-    # Step size between logical workers when assigning GPU IDs.
-    gpu_id_step: int = 1
 
-    # -------------------------------------------------------------------------
-    # Backend and acceleration toggles
-    # -------------------------------------------------------------------------
-    # Attention kernel backend selection.
-    attention_backend: Optional[str] = None
-    # Sampling backend selection.
+    # --------------------------------------------------------------------- #
+    # Backend / acceleration
+    # --------------------------------------------------------------------- #
+    attention_backend: Literal["auto", "flashinfer"] = "auto"
     sampling_backend: Optional[str] = None
-    # Grammar-constrained decoding backend.
-    grammar_backend: Optional[str] = None
-    # Disable CUDA graph capture for debugging/compatibility.
     disable_cuda_graph: bool = False
-    # Enable `torch.compile` acceleration path.
-    enable_torch_compile: bool = False
-    # Maximum batch size considered by `torch.compile` profiles.
+    enable_torch_compile: bool = True
     torch_compile_max_bs: int = 32
-    # Enable deterministic inference behavior where possible.
-    enable_deterministic_inference: bool = False
-    # Random seed for reproducible sampling and initialization.
-    random_seed: Optional[int] = None
+    random_seed: Optional[int] = 42
 
-    # -------------------------------------------------------------------------
-    # Logging, metrics, and observability
-    # -------------------------------------------------------------------------
-    # Global log level for server components.
+    # --------------------------------------------------------------------- #
+    # Logging and observability
+    # --------------------------------------------------------------------- #
     log_level: Literal["debug", "info", "warning", "error", "critical"] = "info"
-    # HTTP access log level; if None, inherits global log level.
-    log_level_http: Optional[str] = None
-    # Log each request payload/metadata for debugging.
-    log_requests: bool = False
-    # Verbosity level for request logging, larger means more detail.
-    log_requests_level: int = 2
-    # Toggle built-in Prometheus/metrics endpoint.
     enable_metrics: bool = False
-    # Include latency/time-cost summaries in logs.
     show_time_cost: bool = False
-    # Optional OpenTelemetry traces endpoint ("host:port").
-    otlp_traces_endpoint: str = "localhost:4317"
-    # Enable tracing export to OTLP collector.
-    enable_trace: bool = False
-
-    # -------------------------------------------------------------------------
-    # Feature switches and advanced decoding options
-    # -------------------------------------------------------------------------
-    # Enable LoRA adapter serving support.
-    enable_lora: bool = False
-    # Maximum number of LoRA adapters loaded simultaneously.
-    max_loaded_loras: Optional[int] = None
-    # Maximum LoRA adapters that can be mixed in one batch.
-    max_loras_per_batch: int = 8
-    # LoRA backend implementation.
-    lora_backend: Literal["triton", "csgmv", "torch_native"] = "csgmv"
-    # Enable multimodal processing pipeline.
-    enable_multimodal: bool = False
-    # Max concurrent multimodal tool calls.
-    mm_max_concurrent_calls: int = 32
-    # Timeout (seconds) for each multimodal call.
-    mm_per_request_timeout: float = 10.0
-    # Speculative decoding algorithm name (e.g. "eagle", "ngram").
-    speculative_algorithm: Optional[str] = None
-    # Draft model path used in speculative decoding.
-    speculative_draft_model_path: Optional[Path] = None
-    # Number of speculative steps per target decode iteration.
-    speculative_num_steps: Optional[int] = None
-    # Number of proposed draft tokens per speculation step.
-    speculative_num_draft_tokens: Optional[int] = None
 
-    # -------------------------------------------------------------------------
-    # Internal bookkeeping (not usually set by users directly)
-    # -------------------------------------------------------------------------
-    # Additional arbitrary key-value options for forward compatibility.
+    # --------------------------------------------------------------------- #
+    # Feature switches
+    # --------------------------------------------------------------------- #
+    # enable_lora: bool = False
+    # max_loaded_loras: Optional[int] = None
+    # max_loras_per_batch: int = 8
+    # lora_backend: Literal["triton", "csgmv", "torch_native"] = "csgmv"
+    # enable_multimodal: bool = False
+    # speculative_algorithm: Optional[str] = None
+    # speculative_draft_model_path: Optional[Path] = None
+    # speculative_num_steps: Optional[int] = None
+    # speculative_num_draft_tokens: Optional[int] = None
+
+    # --------------------------------------------------------------------- #
+    # Extra
+    # --------------------------------------------------------------------- #
     extra_options: dict[str, Any] = field(default_factory=dict)
 
     def __post_init__(self) -> None:
-        """Normalize defaults and validate constraints after dataclass initialization."""
         if self.tokenizer_path is None:
             self.tokenizer_path = self.model_path
         if self.served_model_name is None:
             self.served_model_name = str(self.model_path)
+        self._validate()
 
-        self._validate_basic_constraints()
-        self._validate_parallelism_constraints()
-        self._validate_scheduler_constraints()
-
-    def _validate_basic_constraints(self) -> None:
-        """Validate scalar ranges and common invariants."""
+    def _validate(self) -> None:
         if self.port <= 0 or self.port > 65535:
             raise ValueError("`port` must be in range [1, 65535].")
-        if self.max_prefill_tokens <= 0:
-            raise ValueError("`max_prefill_tokens` must be greater than 0.")
+        if self.max_prefill_tokens is not None and self.max_prefill_tokens <= 0:
+            raise ValueError("`max_prefill_tokens` must be > 0.")
         if self.stream_interval <= 0:
-            raise ValueError("`stream_interval` must be greater than 0.")
+            raise ValueError("`stream_interval` must be > 0.")
         if self.mem_fraction_static is not None and not (
             0.0 < self.mem_fraction_static < 1.0
         ):
-            raise ValueError("`mem_fraction_static` must be in range (0.0, 1.0).")
-
-    def _validate_parallelism_constraints(self) -> None:
-        """Validate distributed and parallel topology settings."""
-        for key, value in {
-            "tp_size": self.tp_size,
-            "dp_size": self.dp_size,
-            "ep_size": self.ep_size,
-            "pp_size": self.pp_size,
-            "nnodes": self.nnodes,
-        }.items():
-            if value <= 0:
-                raise ValueError(f"`{key}` must be greater than 0.")
-
-        if self.node_rank < 0 or self.node_rank >= self.nnodes:
-            raise ValueError("`node_rank` must satisfy 0 <= node_rank < nnodes.")
-
-    def _validate_scheduler_constraints(self) -> None:
-        """Validate scheduler-related soft limits."""
+            raise ValueError("`mem_fraction_static` must be in (0.0, 1.0).")
         if self.max_running_requests is not None and self.max_running_requests <= 0:
-            raise ValueError("`max_running_requests` must be greater than 0 when set.")
+            raise ValueError("`max_running_requests` must be > 0 when set.")
         if self.max_queued_requests is not None and self.max_queued_requests < 0:
             raise ValueError("`max_queued_requests` must be >= 0 when set.")
-        if self.max_total_tokens is not None and self.max_total_tokens <= 0:
-            raise ValueError("`max_total_tokens` must be greater than 0 when set.")
-        if self.chunked_prefill_size is not None and self.chunked_prefill_size <= 0:
-            raise ValueError("`chunked_prefill_size` must be greater than 0 when set.")
         if self.schedule_conservativeness <= 0:
-            raise ValueError("`schedule_conservativeness` must be greater than 0.")
-
-    def to_dict(self) -> dict[str, Any]:
-        """
-        Serialize config to a plain dictionary.
-
-        Path values are converted to string for easier JSON/YAML serialization.
-        """
-        data = asdict(self)
-        for key in [
-            "model_path",
-            "tokenizer_path",
-            "download_dir",
-            "file_storage_path",
-            "speculative_draft_model_path",
-        ]:
-            if data.get(key) is not None:
-                data[key] = str(data[key])
-        return data
+            raise ValueError("`schedule_conservativeness` must be > 0.")
diff --git a/pymllm/engine/launch.py b/pymllm/engine/launch.py
index 7ce1be5e..25ada7c7 100644
--- a/pymllm/engine/launch.py
+++ b/pymllm/engine/launch.py
@@ -1 +1,115 @@
-import multiprocessing as mp
+import logging
+from pathlib import Path
+from typing import Optional
+
+import zmq
+import torch
+import torch.multiprocessing as mp
+from transformers import AutoConfig
+from huggingface_hub import snapshot_download
+from pymllm.configs import get_global_config
+from pymllm.orchestrator.tokenizer_process import TokenizerProcess
+from pymllm.orchestrator.detokenizer_process import DetokenizerProcess
+from pymllm.orchestrator.model_runner_process import ModelRunnerProcess
+from pymllm.orchestrator.async_disk_io_process import AsyncDiskIoProcess
+from pymllm.orchestrator.request_response_process import RequestResponseProcess
+
+logger = logging.getLogger(__name__)
+
+
+class Engine:
+    def __init__(self):
+        self._config_logging()
+        self._set_default_torch_dtype()
+        self._check_model_and_tokenizer()
+
+        # Orchestrator, shall we start the music here?
+        self._launch_processes()
+
+    def _launch_processes(self):
+        """
+        TODO issue processes here
+        """
+
+        # RR process is the main process
+        self._rr_process = RequestResponseProcess()
+
+    def _set_default_torch_dtype(self):
+        """Set the default torch dtype based on the server configuration."""
+        dtype = get_global_config().server.dtype
+        if dtype == "auto":
+            dtype = "bfloat16" if torch.cuda.is_available() else "float32"
+        dtype_map = {
+            "float16": torch.float16,
+            "bfloat16": torch.bfloat16,
+            "float32": torch.float32,
+        }
+        torch_dtype = dtype_map.get(dtype)
+        if torch_dtype is None:
+            raise ValueError(f"Unsupported dtype for torch default dtype: {dtype!r}")
+        torch.set_default_dtype(torch_dtype)
+
+    def _config_logging(self):
+        """Configure logging level from server configuration."""
+        level_name = get_global_config().server.log_level.upper()
+        level = getattr(logging, level_name, logging.INFO)
+        root_logger = logging.getLogger()
+        if not root_logger.handlers:
+            logging.basicConfig(
+                level=level,
+                format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+            )
+        else:
+            root_logger.setLevel(level)
+        logging.getLogger("pymllm").setLevel(level)
+
+    def _check_model_and_tokenizer(self):
+        cfg = get_global_config()
+        if cfg.server.model_path is None or cfg.server.tokenizer_path is None:
+            logger.error("Model path or tokenizer path is not set")
+            raise ValueError("Model path or tokenizer path is not set")
+        model_path = cfg.server.model_path
+        tokenizer_path = cfg.server.tokenizer_path
+        download_dir = cfg.server.download_dir
+        trust_remote_code = cfg.server.trust_remote_code
+
+        shared_path = model_path == tokenizer_path
+
+        model_path = self._maybe_download(model_path, download_dir)
+        cfg.server.model_path = model_path
+
+        if shared_path:
+            cfg.server.tokenizer_path = model_path
+        else:
+            cfg.server.tokenizer_path = self._maybe_download(
+                tokenizer_path, download_dir
+            )
+
+        cfg.model.hf_config = AutoConfig.from_pretrained(
+            str(model_path),
+            trust_remote_code=trust_remote_code,
+        )
+        logger.info("Loaded model config: %s", cfg.model.hf_config.__class__.__name__)
+
+    @staticmethod
+    def _maybe_download(path: Path, download_dir: Optional[Path] = None) -> Path:
+        """Return a local directory for *path*, downloading if necessary."""
+        if path.is_dir():
+            return path
+
+        repo_id = str(path)
+        logger.info("Downloading '%s' ...", repo_id)
+
+        kwargs = {}
+        if download_dir is not None:
+            kwargs["local_dir"] = str(download_dir / path.name)
+
+        downloaded = snapshot_download(repo_id=repo_id, **kwargs)
+        logger.info("Downloaded '%s' to '%s'", repo_id, downloaded)
+        return Path(downloaded)
+
+    def generate(self, stream: bool = True):
+        pass
+
+    async def generate_async(self, stream: bool = True):
+        pass
diff --git a/pymllm/orchestrator/scheduler.py b/pymllm/executor/eager_runner.py
similarity index 100%
rename from pymllm/orchestrator/scheduler.py
rename to pymllm/executor/eager_runner.py
diff --git a/pymllm/orchestrator/async_disk_io_process.py b/pymllm/orchestrator/async_disk_io_process.py
new file mode 100644
index 00000000..598d93eb
--- /dev/null
+++ b/pymllm/orchestrator/async_disk_io_process.py
@@ -0,0 +1,3 @@
+class AsyncDiskIoProcess:
+    def __init__(self):
+        pass
diff --git a/pymllm/orchestrator/detokenizer_process.py b/pymllm/orchestrator/detokenizer_process.py
new file mode 100644
index 00000000..47c1c595
--- /dev/null
+++ b/pymllm/orchestrator/detokenizer_process.py
@@ -0,0 +1,3 @@
+class DetokenizerProcess:
+    def __init__(self):
+        pass
diff --git a/pymllm/orchestrator/model_runner_process.py b/pymllm/orchestrator/model_runner_process.py
new file mode 100644
index 00000000..45091b59
--- /dev/null
+++ b/pymllm/orchestrator/model_runner_process.py
@@ -0,0 +1,3 @@
+class ModelRunnerProcess:
+    def __init__(self):
+        pass
diff --git a/pymllm/orchestrator/parallel_state.py b/pymllm/orchestrator/parallel_state.py
index 545c74a8..9fb20876 100644
--- a/pymllm/orchestrator/parallel_state.py
+++ b/pymllm/orchestrator/parallel_state.py
@@ -1,21 +1,31 @@
-"""Parallel state management for tensor and pipeline parallelism."""
+"""Minimal parallel state for single-GPU serving.
+
+pymllm targets single-GPU, high-concurrency inference. This module keeps
+the TP / DP / PP scaffolding so the rest of the codebase can query ranks
+and groups uniformly, but the default (and expected) case is world_size=1.
+"""
 
 import logging
+from typing import Optional
+
 import torch
 import torch.distributed as dist
-from typing import Optional
 
-from pymllm.configs.global_config import get_global_config
 from pymllm.orchestrator.group_coordinator import GroupCoordinator
 
 logger = logging.getLogger(__name__)
 
-
-# Global groups
 _TP_GROUP: Optional[GroupCoordinator] = None
 _DP_GROUP: Optional[GroupCoordinator] = None
 _PP_GROUP: Optional[GroupCoordinator] = None
 
+_TP_RANK: int = 0
+_TP_SIZE: int = 1
+_DP_RANK: int = 0
+_DP_SIZE: int = 1
+_PP_RANK: int = 0
+_PP_SIZE: int = 1
+
 
 def initialize_model_parallel(
     tensor_model_parallel_size: int = 1,
@@ -23,15 +33,12 @@ def initialize_model_parallel(
     pipeline_model_parallel_size: int = 1,
     backend: str = "nccl",
 ) -> None:
-    """Initialize model parallel groups.
-
-    Args:
-        tensor_model_parallel_size: Number of GPUs for tensor parallelism
-        data_parallel_size: Number of GPUs for data parallelism
-        pipeline_model_parallel_size: Number of stages for pipeline parallelism
-        backend: Communication backend (nccl for GPU, gloo for CPU)
-    """
     global _TP_GROUP, _DP_GROUP, _PP_GROUP
+    global _TP_RANK, _TP_SIZE, _DP_RANK, _DP_SIZE, _PP_RANK, _PP_SIZE
+
+    _TP_SIZE = tensor_model_parallel_size
+    _DP_SIZE = data_parallel_size
+    _PP_SIZE = pipeline_model_parallel_size
 
     if not dist.is_initialized():
         return
@@ -40,29 +47,6 @@ def initialize_model_parallel(
     world_rank = dist.get_rank()
     local_rank = int(torch.cuda.current_device()) if torch.cuda.is_available() else 0
 
-    config = get_global_config()
-
-    # Update runtime config
-    config.runtime.world_size = world_size
-    config.runtime.world_rank = world_rank
-    config.runtime.local_rank = local_rank
-    config.runtime.tp_size = tensor_model_parallel_size
-    config.runtime.dp_size = data_parallel_size
-    config.runtime.pp_size = pipeline_model_parallel_size
-
-    # Logging
-    logger.info(
-        "Model parallel runtime config set: world_size=%s, world_rank=%s, "
-        "local_rank=%s, tp_size=%s, dp_size=%s, pp_size=%s",
-        config.runtime.world_size,
-        config.runtime.world_rank,
-        config.runtime.local_rank,
-        config.runtime.tp_size,
-        config.runtime.dp_size,
-        config.runtime.pp_size,
-    )
-
-    # Validate parallelism setup
     assert (
         tensor_model_parallel_size * data_parallel_size * pipeline_model_parallel_size
         == world_size
@@ -71,13 +55,22 @@ def initialize_model_parallel(
         f"PP({pipeline_model_parallel_size}) != World({world_size})"
     )
 
-    # Create TP groups (intra-layer sharding)
+    logger.info(
+        "Parallel init: world=%d rank=%d tp=%d dp=%d pp=%d",
+        world_size,
+        world_rank,
+        tensor_model_parallel_size,
+        data_parallel_size,
+        pipeline_model_parallel_size,
+    )
+
     if tensor_model_parallel_size > 1:
         num_tp_groups = world_size // tensor_model_parallel_size
         for i in range(num_tp_groups):
             ranks = list(
                 range(
-                    i * tensor_model_parallel_size, (i + 1) * tensor_model_parallel_size
+                    i * tensor_model_parallel_size,
+                    (i + 1) * tensor_model_parallel_size,
                 )
             )
             if world_rank in ranks:
@@ -86,13 +79,9 @@ def initialize_model_parallel(
                     local_rank=local_rank,
                     backend=backend,
                 )
-                config.runtime.tp_rank = _TP_GROUP.rank_in_group
+                _TP_RANK = _TP_GROUP.rank_in_group
                 break
-    else:
-        _TP_GROUP = None
-        config.runtime.tp_rank = 0
 
-    # Create DP groups (data replication)
     if data_parallel_size > 1:
         num_dp_groups = world_size // data_parallel_size
         for i in range(num_dp_groups):
@@ -103,13 +92,9 @@ def initialize_model_parallel(
                     local_rank=local_rank,
                     backend=backend,
                 )
-                config.runtime.dp_rank = _DP_GROUP.rank_in_group
+                _DP_RANK = _DP_GROUP.rank_in_group
                 break
-    else:
-        _DP_GROUP = None
-        config.runtime.dp_rank = 0
 
-    # Create PP groups (inter-layer partitioning)
     if pipeline_model_parallel_size > 1:
         num_pp_groups = world_size // pipeline_model_parallel_size
         for i in range(num_pp_groups):
@@ -121,67 +106,60 @@ def initialize_model_parallel(
                     local_rank=local_rank,
                     backend=backend,
                 )
-                config.runtime.pp_rank = _PP_GROUP.rank_in_group
+                _PP_RANK = _PP_GROUP.rank_in_group
                 break
-    else:
-        _PP_GROUP = None
-        config.runtime.pp_rank = 0
+
+
+# ---- group accessors ------------------------------------------------------
 
 
 def get_tp_group() -> Optional[GroupCoordinator]:
-    """Get the tensor model parallel group."""
     return _TP_GROUP
 
 
 def get_dp_group() -> Optional[GroupCoordinator]:
-    """Get the data parallel group."""
     return _DP_GROUP
 
 
 def get_pp_group() -> Optional[GroupCoordinator]:
-    """Get the pipeline parallel group."""
     return _PP_GROUP
 
 
-# Convenience functions for tensor parallelism
+# ---- rank / size helpers --------------------------------------------------
+
+
 def get_tensor_model_parallel_rank() -> int:
-    """Get current tensor model parallel rank."""
-    return get_global_config().runtime.tp_rank
+    return _TP_RANK
 
 
 def get_tensor_model_parallel_world_size() -> int:
-    """Get tensor model parallel world size."""
-    return get_global_config().runtime.tp_size
+    return _TP_SIZE
 
 
 def get_data_parallel_rank() -> int:
-    """Get current data parallel rank."""
-    return get_global_config().runtime.dp_rank
+    return _DP_RANK
 
 
 def get_data_parallel_world_size() -> int:
-    """Get data parallel world size."""
-    return get_global_config().runtime.dp_size
+    return _DP_SIZE
 
 
 def get_pipeline_model_parallel_rank() -> int:
-    """Get current pipeline parallel rank."""
-    return get_global_config().runtime.pp_rank
+    return _PP_RANK
 
 
 def get_pipeline_model_parallel_world_size() -> int:
-    """Get pipeline parallel world size."""
-    return get_global_config().runtime.pp_size
+    return _PP_SIZE
 
 
 def model_parallel_is_initialized() -> bool:
-    """Check if model parallel is initialized."""
     return _TP_GROUP is not None or _DP_GROUP is not None or _PP_GROUP is not None
 
 
-# Communication helpers
+# ---- communication helpers ------------------------------------------------
+
+
 def tensor_model_parallel_all_reduce(tensor: torch.Tensor) -> torch.Tensor:
-    """All-reduce across TP group."""
     group = get_tp_group()
     if group is None:
         return tensor
@@ -192,7 +170,6 @@ def tensor_model_parallel_all_gather(
     tensor: torch.Tensor,
     dim: int = 0,
 ) -> torch.Tensor:
-    """All-gather across TP group."""
     group = get_tp_group()
     if group is None:
         return tensor
@@ -200,7 +177,6 @@ def tensor_model_parallel_all_gather(
 
 
 def data_parallel_all_reduce(tensor: torch.Tensor) -> torch.Tensor:
-    """All-reduce across DP group."""
     group = get_dp_group()
     if group is None:
         return tensor
diff --git a/pymllm/orchestrator/request_response_process.py b/pymllm/orchestrator/request_response_process.py
new file mode 100644
index 00000000..998c2655
--- /dev/null
+++ b/pymllm/orchestrator/request_response_process.py
@@ -0,0 +1,10 @@
+"""
+This module contains the request and response threads for the orchestrator.
+
+NOTE: This RR(request and response) threads can only be used as the main thread of the orchestrator.
+"""
+
+
+class RequestResponseProcess:
+    def __init__(self):
+        pass
diff --git a/pymllm/orchestrator/scheduler_process.py b/pymllm/orchestrator/scheduler_process.py
new file mode 100644
index 00000000..7a7783d5
--- /dev/null
+++ b/pymllm/orchestrator/scheduler_process.py
@@ -0,0 +1,3 @@
+class SchedulerProcess:
+    def __init__(self):
+        pass
diff --git a/pymllm/orchestrator/tokenizer_process.py b/pymllm/orchestrator/tokenizer_process.py
new file mode 100644
index 00000000..0dca2155
--- /dev/null
+++ b/pymllm/orchestrator/tokenizer_process.py
@@ -0,0 +1,3 @@
+class TokenizerProcess:
+    def __init__(self):
+        pass
diff --git a/pymllm/server/launch.py b/pymllm/server/launch.py
index e69de29b..83a222f7 100644
--- a/pymllm/server/launch.py
+++ b/pymllm/server/launch.py
@@ -0,0 +1,17 @@
+from pymllm.engine.launch import Engine
+from pymllm.configs.global_config import make_args, read_args
+
+
+def _prepare_args():
+    parser = make_args()
+    read_args(parser=parser)
+
+
+def main():
+    _prepare_args()
+    engine = Engine()
+    engine.launch()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pymllm/tests/test_vocab_parallel_embedding.py b/pymllm/tests/test_vocab_parallel_embedding.py
index e22b52a5..44148f98 100644
--- a/pymllm/tests/test_vocab_parallel_embedding.py
+++ b/pymllm/tests/test_vocab_parallel_embedding.py
@@ -12,10 +12,11 @@
 import torch.multiprocessing as mp
 from typing import Callable
 
-from pymllm.configs import get_global_config
 from pymllm.layers import VocabParallelEmbedding
-from pymllm.orchestrator import (
-    initialize_model_parallel,
+from pymllm.orchestrator import initialize_model_parallel
+from pymllm.orchestrator.parallel_state import (
+    get_tensor_model_parallel_rank,
+    get_tensor_model_parallel_world_size,
 )
 
 # Show runtime init logs during test execution.
@@ -91,10 +92,11 @@ def embedding_forward_tp8_worker_cuda(rank: int, local_rank: int, world_size: in
         local_rank: Local rank within this node (for logging/debugging)
         world_size: Total world size
     """
-    config = get_global_config()
+    tp_size = get_tensor_model_parallel_world_size()
+    tp_rank = get_tensor_model_parallel_rank()
 
-    assert config.runtime.tp_size == 8, f"Rank {rank}: tp_size should be 8"
-    assert config.runtime.tp_rank == rank, f"Rank {rank}: tp_rank mismatch"
+    assert tp_size == 8, f"Rank {rank}: tp_size should be 8"
+    assert tp_rank == rank, f"Rank {rank}: tp_rank mismatch"
 
     vocab_size = 1024
     embed_dim = 64
@@ -281,12 +283,12 @@ class TestVocabParallelEmbeddingCUDA:
 
     @pytest.fixture(autouse=True)
     def setup_config(self):
-        config = get_global_config()
-        config.runtime.tp_size = 1
-        config.runtime.tp_rank = 0
+        import pymllm.orchestrator.parallel_state as ps
+        ps._TP_SIZE = 1
+        ps._TP_RANK = 0
         yield
-        config.runtime.tp_size = 1
-        config.runtime.tp_rank = 0
+        ps._TP_SIZE = 1
+        ps._TP_RANK = 0
 
     def test_cuda_forward(self):
         layer = VocabParallelEmbedding(1000, 512).cuda()
diff --git a/pyproject.toml b/pyproject.toml
index 160341ba..d417b579 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,7 @@ dependencies=[
   "packaging",
   "pytest",
   "pytest-html",
-  "apache-tvm-ffi == 0.1.8",
+  "apache-tvm-ffi == 0.1.8.post2",
   "pyyaml >= 6.0.2",
   "openai",
   "modelscope",
@@ -33,12 +33,13 @@ dependencies=[
 ]
 
 [project.optional-dependencies]
-cuda = ["tilelang", "flashinfer-python"]
+cuda = ["tilelang", "flashinfer-python", "pyzmq"]
 
 [project.scripts]
 pymllm = "pymllm.__main__:main"
 mllm-convertor = "pymllm.mobile.utils.mllm_convertor:main"
 mllm-service = "pymllm.mobile.service.tools:cli_app"
+pymllm-server = "pymllm.server.launch:main"
 
 [tool.setuptools.exclude-package-data]
 "*" = ["*.pyc"]

From 6c4aa444971841693fa682422882edd9ce789797 Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Sat, 21 Feb 2026 15:05:35 +0000
Subject: [PATCH 08/13] feat: implement store_cache functionality and related
 components

- Added a new `store_cache` CUDA kernel for efficient key/value tensor storage in a cache.
- Introduced Python interface for the `store_cache` kernel, enabling its use in PyTorch.
- Created benchmarks to compare `store_cache` performance against standard PyTorch indexing.
- Updated `.gitignore` to exclude `.claude` directory and added `.pytest_cache` to `mllm-kernel`.
- Added tests for `store_cache` functionality to ensure correctness and performance.
- Refactored memory management in `KVPool` to utilize the new `store_cache` kernel when applicable.
---
 .claude/skills/update-codeowners/SKILL.md     |  44 +
 .gitignore                                    |   1 -
 mllm-kernel/.gitignore                        |   1 +
 mllm-kernel/benchmarks/bench_store_cache.py   | 164 ++++
 .../mllm_kernel/cuda/csrc/store_cache.cuh     | 202 +++++
 mllm-kernel/mllm_kernel/cuda/jit/__init__.py  |   3 +-
 .../mllm_kernel/cuda/jit/store_cache.py       | 127 +++
 mllm-kernel/tests/test_store_cache.py         |  66 ++
 pymllm/engine/io_struct.py                    | 196 +++++
 pymllm/engine/launch.py                       | 308 ++++++-
 pymllm/mem_cache/__init__.py                  |  37 +
 pymllm/mem_cache/memory_pool.py               | 480 +++++++++++
 pymllm/mem_cache/param_disk_cache.py          |   0
 pymllm/mem_cache/radix_cache.py               | 794 ++++++++++++++++++
 pymllm/orchestrator/async_disk_io_process.py  |  83 +-
 pymllm/orchestrator/detokenizer_process.py    | 113 ++-
 pymllm/orchestrator/ipc_utils.py              |  70 ++
 pymllm/orchestrator/model_runner_process.py   | 113 ++-
 .../orchestrator/request_response_process.py  | 148 +++-
 pymllm/orchestrator/scheduler_process.py      | 247 +++++-
 pymllm/orchestrator/tokenizer_process.py      | 101 ++-
 21 files changed, 3264 insertions(+), 34 deletions(-)
 create mode 100644 .claude/skills/update-codeowners/SKILL.md
 create mode 100644 mllm-kernel/benchmarks/bench_store_cache.py
 create mode 100644 mllm-kernel/mllm_kernel/cuda/csrc/store_cache.cuh
 create mode 100644 mllm-kernel/mllm_kernel/cuda/jit/store_cache.py
 create mode 100644 mllm-kernel/tests/test_store_cache.py
 create mode 100644 pymllm/engine/io_struct.py
 create mode 100644 pymllm/mem_cache/memory_pool.py
 delete mode 100644 pymllm/mem_cache/param_disk_cache.py
 create mode 100644 pymllm/orchestrator/ipc_utils.py

diff --git a/.claude/skills/update-codeowners/SKILL.md b/.claude/skills/update-codeowners/SKILL.md
new file mode 100644
index 00000000..28666704
--- /dev/null
+++ b/.claude/skills/update-codeowners/SKILL.md
@@ -0,0 +1,44 @@
+---
+name: update-codeowners
+description: Updates CODEOWNERS entries safely with consistent path and owner formatting. Use when the user asks to add, remove, or modify CODEOWNERS rules, ownership mappings, reviewers, or module maintainers.
+---
+
+# Update CODEOWNERS
+
+## Goal
+Maintain `CODEOWNERS` accurately while preserving the repository's existing section/comment style.
+
+## Workflow
+1. Read the current `CODEOWNERS` file before editing.
+2. Identify requested changes as one of:
+   - Add new path rule
+   - Modify owners for existing path rule
+   - Remove obsolete path rule
+   - Reorganize section comments (only if requested)
+3. Update rules in place instead of creating duplicates for the same path.
+4. Keep existing section headers and comment style unless the user asks to refactor structure.
+5. Return a concise changelog describing which paths were added, changed, or removed.
+
+## Rule Format
+- Use one rule per line: `<path-pattern> <owner1> <owner2> ...`
+- Owners must be GitHub handles prefixed with `@`.
+- Keep path style consistent with the file (in this repo, path patterns typically start with `/`).
+- Do not leave rules with empty owner lists.
+
+## Editing Guidelines
+- Prefer minimal edits near related sections.
+- If a path already exists, update that line instead of adding a second conflicting line.
+- If a new rule logically belongs to an existing section, place it in that section.
+- Preserve human-readable grouping and blank lines.
+- Keep comments intact unless they are clearly outdated and the user asked for cleanup.
+
+## Validation Checklist
+- [ ] Every non-comment, non-empty line has at least one owner.
+- [ ] Every owner token starts with `@`.
+- [ ] No accidental duplicate rule for the exact same path pattern.
+- [ ] Existing comments/sections were preserved unless explicitly changed.
+
+## Example Requests
+- "Add `/mllm/models/new_model/ @alice @bob` under models."
+- "Change `/core/Storage` owner to `@team-core`."
+- "Remove ownership rule for deprecated path `/legacy/`."
diff --git a/.gitignore b/.gitignore
index 7397d6ec..cdafc270 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,6 @@
 .cache/
 .tmp/
 compile_commands.json
-.claude/
 
 # MLLM Team Specific
 tasks/mllmteam*
diff --git a/mllm-kernel/.gitignore b/mllm-kernel/.gitignore
index df61d0fa..3eefc8fb 100644
--- a/mllm-kernel/.gitignore
+++ b/mllm-kernel/.gitignore
@@ -3,3 +3,4 @@ build-py/
 .vscode/settings.json
 compile_commands.json
 .clangd
+.pytest_cache/
diff --git a/mllm-kernel/benchmarks/bench_store_cache.py b/mllm-kernel/benchmarks/bench_store_cache.py
new file mode 100644
index 00000000..b96fa608
--- /dev/null
+++ b/mllm-kernel/benchmarks/bench_store_cache.py
@@ -0,0 +1,164 @@
+"""Benchmark store_cache vs torch index with torch.profiler.
+
+Example:
+python benchmarks/bench_store_cache.py --warmup 20 --iters 200 --batch-size 512 --num-slots 8192
+"""
+
+import argparse
+
+import torch
+from torch.profiler import ProfilerActivity, profile
+
+from mllm_kernel.cuda.jit import can_use_store_cache, store_cache
+
+
+def _run_store_cache_once(
+    k: torch.Tensor,
+    v: torch.Tensor,
+    k_cache: torch.Tensor,
+    v_cache: torch.Tensor,
+    indices: torch.Tensor,
+):
+    store_cache(k, v, k_cache, v_cache, indices)
+
+
+def _run_torch_index_once(
+    k: torch.Tensor,
+    v: torch.Tensor,
+    k_cache: torch.Tensor,
+    v_cache: torch.Tensor,
+    indices: torch.Tensor,
+):
+    k_cache[indices] = k
+    v_cache[indices] = v
+
+
+def _profile_path(
+    name: str,
+    fn,
+    *,
+    warmup: int,
+    iters: int,
+    row_limit: int,
+    trace_path: str | None,
+):
+    for _ in range(warmup):
+        fn()
+    torch.cuda.synchronize()
+
+    with profile(
+        activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
+        record_shapes=False,
+        profile_memory=False,
+        with_stack=False,
+    ) as prof:
+        for _ in range(iters):
+            fn()
+    torch.cuda.synchronize()
+
+    events = prof.key_averages()
+    # torch profiler times are in microseconds.
+    # PyTorch versions vary between *cuda* and *device* naming.
+    time_attr = (
+        "self_cuda_time_total"
+        if events and hasattr(events[0], "self_cuda_time_total")
+        else "self_device_time_total"
+    )
+    sort_key = (
+        "self_cuda_time_total"
+        if time_attr == "self_cuda_time_total"
+        else "self_device_time_total"
+    )
+    total_self_device_us = sum(float(getattr(evt, time_attr, 0.0)) for evt in events)
+    avg_self_device_us = total_self_device_us / max(iters, 1)
+
+    print(f"\n=== {name} ===")
+    print(
+        prof.key_averages().table(
+            sort_by=sort_key,
+            row_limit=row_limit,
+        )
+    )
+    print(f"{name} total self device time: {total_self_device_us:.2f} us")
+    print(f"{name} avg self device time/iter: {avg_self_device_us:.2f} us")
+
+    if trace_path:
+        prof.export_chrome_trace(trace_path)
+        print(f"{name} trace exported: {trace_path}")
+
+    return avg_self_device_us
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Benchmark store_cache vs torch index using torch.profiler"
+    )
+    parser.add_argument("--batch-size", type=int, default=1024)
+    parser.add_argument("--num-slots", type=int, default=16384)
+    parser.add_argument("--head-num", type=int, default=8)
+    parser.add_argument("--head-dim", type=int, default=128)
+    parser.add_argument(
+        "--dtype",
+        type=str,
+        default="float16",
+        choices=["float16", "bfloat16", "float32"],
+    )
+    parser.add_argument("--warmup", type=int, default=50)
+    parser.add_argument("--iters", type=int, default=200)
+    parser.add_argument("--row-limit", type=int, default=20)
+    parser.add_argument("--export-trace-dir", type=str, default="")
+    parser.add_argument("--seed", type=int, default=0)
+    args = parser.parse_args()
+
+    if not torch.cuda.is_available():
+        raise RuntimeError("CUDA is required for this benchmark")
+
+    torch.manual_seed(args.seed)
+    device = torch.device("cuda")
+    dtype = getattr(torch, args.dtype)
+
+    row_dim = args.head_num * args.head_dim
+    row_bytes = row_dim * torch.tensor([], dtype=dtype).element_size()
+    if not can_use_store_cache(row_bytes):
+        raise RuntimeError(f"store_cache is unavailable for row_bytes={row_bytes}")
+
+    k = torch.randn(args.batch_size, row_dim, device=device, dtype=dtype)
+    v = torch.randn(args.batch_size, row_dim, device=device, dtype=dtype)
+    # Use unique indices to avoid write conflicts.
+    indices = torch.randperm(args.num_slots, device=device)[: args.batch_size].to(
+        torch.int64
+    )
+    k_cache = torch.zeros(args.num_slots, row_dim, device=device, dtype=dtype)
+    v_cache = torch.zeros_like(k_cache)
+    print("=== store_cache profiler benchmark ===")
+    print(
+        f"shape: batch={args.batch_size}, row_dim={row_dim}, slots={args.num_slots}, dtype={dtype}"
+    )
+    print(f"warmup={args.warmup}, iters={args.iters}, row_limit={args.row_limit}")
+
+    trace_dir = args.export_trace_dir.strip()
+    store_trace = f"{trace_dir}/store_cache_trace.json" if trace_dir else None
+    torch_trace = f"{trace_dir}/torch_index_trace.json" if trace_dir else None
+
+    store_avg_us = _profile_path(
+        "store_cache",
+        lambda: _run_store_cache_once(k, v, k_cache, v_cache, indices),
+        warmup=args.warmup,
+        iters=args.iters,
+        row_limit=args.row_limit,
+        trace_path=store_trace,
+    )
+    torch_avg_us = _profile_path(
+        "torch_index",
+        lambda: _run_torch_index_once(k, v, k_cache, v_cache, indices),
+        warmup=args.warmup,
+        iters=args.iters,
+        row_limit=args.row_limit,
+        trace_path=torch_trace,
+    )
+    speedup = torch_avg_us / max(store_avg_us, 1e-12)
+    print(f"\nSpeedup: {speedup:.3f}x")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mllm-kernel/mllm_kernel/cuda/csrc/store_cache.cuh b/mllm-kernel/mllm_kernel/cuda/csrc/store_cache.cuh
new file mode 100644
index 00000000..05daabee
--- /dev/null
+++ b/mllm-kernel/mllm_kernel/cuda/csrc/store_cache.cuh
@@ -0,0 +1,202 @@
+// Copyright SGLang Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Store KV cache kernel: efficiently scatter key/value tensors into a
+// pre-allocated KV cache pool using warp-level vectorized copies.
+//
+// Reference: sglang jit_kernel/csrc/elementwise/kvcache.cuh
+
+#pragma once
+
+#include <mllm_kernel/tensor.hpp>
+#include <mllm_kernel/utils.hpp>
+#include <mllm_kernel/utils.cuh>
+
+#include <dlpack/dlpack.h>
+#include <tvm/ffi/container/tensor.h>
+
+#include <cstdint>
+
+namespace {
+
+// ───────────────────────────────────────────────────────────────
+// Parameter block passed to the kernel via __grid_constant__
+// ───────────────────────────────────────────────────────────────
+
+struct StoreKVCacheParams {
+  const void* __restrict__ k;
+  const void* __restrict__ v;
+  void* __restrict__ k_cache;
+  void* __restrict__ v_cache;
+  const void* __restrict__ indices;
+  int64_t stride_k_bytes;
+  int64_t stride_v_bytes;
+  int64_t stride_cache_bytes;
+  int64_t stride_indices;
+  uint32_t batch_size;
+};
+
+constexpr uint32_t kNumWarps = 4;
+constexpr uint32_t kThreadsPerBlock = kNumWarps * device::kWarpThreads;
+
+// ───────────────────────────────────────────────────────────────
+// Vectorized warp-level KV copy
+// ───────────────────────────────────────────────────────────────
+//
+// Each warp copies kElementBytes of K data and kElementBytes of V
+// data using the widest possible aligned vector type (uint4 = 16B,
+// uint2 = 8B, or uint32_t = 4B).
+
+namespace detail {
+
+template<typename Vec>
+__device__ __forceinline__ void warp_copy_bytes(const void* __restrict__ src, void* __restrict__ dst, int64_t num_vecs) {
+  const int lane = threadIdx.x % device::kWarpThreads;
+  const auto* s = static_cast<const Vec*>(src);
+  auto* d = static_cast<Vec*>(dst);
+  for (int64_t i = lane; i < num_vecs; i += device::kWarpThreads) { d[i] = s[i]; }
+}
+
+}  // namespace detail
+
+template<int64_t kElementBytes>
+__device__ __forceinline__ void copy_kv_warp(const void* __restrict__ k_src, const void* __restrict__ v_src,
+                                             void* __restrict__ k_dst, void* __restrict__ v_dst) {
+  static_assert(kElementBytes > 0 && kElementBytes % 4 == 0, "Element size must be a positive multiple of 4 bytes");
+
+  // Pick the widest aligned vector type the element size supports.
+  if constexpr (kElementBytes % 16 == 0) {
+    constexpr int64_t N = kElementBytes / 16;
+    detail::warp_copy_bytes<uint4>(k_src, k_dst, N);
+    detail::warp_copy_bytes<uint4>(v_src, v_dst, N);
+  } else if constexpr (kElementBytes % 8 == 0) {
+    constexpr int64_t N = kElementBytes / 8;
+    detail::warp_copy_bytes<uint2>(k_src, k_dst, N);
+    detail::warp_copy_bytes<uint2>(v_src, v_dst, N);
+  } else {
+    constexpr int64_t N = kElementBytes / 4;
+    detail::warp_copy_bytes<uint32_t>(k_src, k_dst, N);
+    detail::warp_copy_bytes<uint32_t>(v_src, v_dst, N);
+  }
+}
+
+// ───────────────────────────────────────────────────────────────
+// Main kernel
+// ───────────────────────────────────────────────────────────────
+//
+// Template parameters:
+//   kElementBytes  total bytes per token row (head_num * head_dim * dtype_size)
+//   kSplit         how many warps collaborate on one element (1, 2, or 4)
+//   kUsePDL        whether to emit PDL synchronisation instructions
+//   T              index dtype (int32_t or int64_t)
+
+template<int64_t kElementBytes, int kSplit, bool kUsePDL, typename T>
+__global__ void store_kvcache(const __grid_constant__ StoreKVCacheParams params) {
+  using namespace device;
+  constexpr auto kSplitSize = kElementBytes / kSplit;
+
+  const uint32_t warp_id = blockIdx.x * kNumWarps + threadIdx.x / kWarpThreads;
+  const uint32_t item_id = warp_id / kSplit;
+  const uint32_t split_id = warp_id % kSplit;
+
+  const auto& [k_input, v_input, k_cache, v_cache, indices, stride_k, stride_v, stride_cache, stride_indices, batch_size] =
+      params;
+
+  if (item_id >= batch_size) return;
+
+  const auto index_ptr = static_cast<const T*>(indices) + item_id * stride_indices;
+  PDLWaitPrimary<kUsePDL>();
+
+  const auto index = *index_ptr;
+  const auto k_src = pointer::offset(k_input, item_id * stride_k, split_id * kSplitSize);
+  const auto v_src = pointer::offset(v_input, item_id * stride_v, split_id * kSplitSize);
+  const auto k_dst = pointer::offset(k_cache, index * stride_cache, split_id * kSplitSize);
+  const auto v_dst = pointer::offset(v_cache, index * stride_cache, split_id * kSplitSize);
+
+  copy_kv_warp<kSplitSize>(k_src, v_src, k_dst, v_dst);
+  PDLTriggerSecondary<kUsePDL>();
+}
+
+template<int64_t kElementBytes, bool kUsePDL>
+struct StoreKVCacheKernel {
+  static_assert(kElementBytes > 0 && kElementBytes % 4 == 0);
+
+  template<int kSplit, typename T>
+  static constexpr auto store_kernel = store_kvcache<kElementBytes, kSplit, kUsePDL, T>;
+
+  template<typename T>
+  static auto get_kernel(int num_split) {
+    using namespace mllm_kernel::host;
+    if constexpr (kElementBytes % (4 * 128) == 0) {
+      if (num_split == 4) return store_kernel<4, T>;
+    }
+    if constexpr (kElementBytes % (2 * 128) == 0) {
+      if (num_split == 2) return store_kernel<2, T>;
+    }
+    if (num_split == 1) return store_kernel<1, T>;
+    Panic("Unsupported num_split ", num_split, " for element size ", kElementBytes);
+  }
+
+  static void run(tvm::ffi::TensorView k, tvm::ffi::TensorView v, tvm::ffi::TensorView k_cache, tvm::ffi::TensorView v_cache,
+                  tvm::ffi::TensorView indices, int num_split) {
+    using namespace mllm_kernel::host;
+
+    auto B = SymbolicSize{"batch_size"};
+    auto D = SymbolicSize{"element_size"};
+    auto KS = SymbolicSize{"k_stride"};
+    auto VS = SymbolicSize{"v_stride"};
+    auto S = SymbolicSize{"cache_stride"};
+    auto I = SymbolicSize{"indices_stride"};
+    auto dtype = SymbolicDType{};
+    auto device = SymbolicDevice{};
+    auto indice_dtype = SymbolicDType{};
+    device.set_options<kDLCUDA>();
+
+    // k, v: [B, D]  with strides [KS, 1]
+    (void)TensorMatcher({B, D}).with_strides({KS, 1}).with_dtype(dtype).with_device(device).verify(k);
+    (void)TensorMatcher({B, D}).with_strides({VS, 1}).with_dtype(dtype).with_device(device).verify(v);
+
+    // k_cache, v_cache: [*, D]  with strides [S, 1]
+    (void)TensorMatcher({-1, D}).with_strides({S, 1}).with_dtype(dtype).with_device(device).verify(k_cache).verify(v_cache);
+
+    // indices: [B]  with strides [I]
+    (void)TensorMatcher({B}).with_strides({I}).with_dtype<int32_t, int64_t>(indice_dtype).with_device(device).verify(indices);
+
+    const int64_t dtype_size = dtype_bytes(dtype.unwrap());
+    const uint32_t num_elements = static_cast<uint32_t>(B.unwrap());
+    RuntimeCheck(kElementBytes == dtype_size * D.unwrap(), "Element size mismatch: expected ", kElementBytes, " but got ",
+                 dtype_size * D.unwrap());
+
+    const auto params = StoreKVCacheParams{
+        .k = k.data_ptr(),
+        .v = v.data_ptr(),
+        .k_cache = k_cache.data_ptr(),
+        .v_cache = v_cache.data_ptr(),
+        .indices = indices.data_ptr(),
+        .stride_k_bytes = KS.unwrap() * dtype_size,
+        .stride_v_bytes = VS.unwrap() * dtype_size,
+        .stride_cache_bytes = S.unwrap() * dtype_size,
+        .stride_indices = I.unwrap(),
+        .batch_size = num_elements,
+    };
+
+    const auto use_int32 = indice_dtype.is_type<int32_t>();
+    const auto kernel = use_int32 ? get_kernel<int32_t>(num_split) : get_kernel<int64_t>(num_split);
+    const auto num_blocks = div_ceil(num_elements * num_split, kNumWarps);
+
+    LaunchKernel(num_blocks, kThreadsPerBlock, device.unwrap()).enable_pdl(kUsePDL)(kernel, params);
+  }
+};
+
+}  // namespace
diff --git a/mllm-kernel/mllm_kernel/cuda/jit/__init__.py b/mllm-kernel/mllm_kernel/cuda/jit/__init__.py
index 696e73ea..202ff3b3 100644
--- a/mllm-kernel/mllm_kernel/cuda/jit/__init__.py
+++ b/mllm-kernel/mllm_kernel/cuda/jit/__init__.py
@@ -1,3 +1,4 @@
 from .add_constant import add_constant
+from .store_cache import can_use_store_cache, store_cache
 
-__all__ = ["add_constant"]
+__all__ = ["add_constant", "can_use_store_cache", "store_cache"]
diff --git a/mllm-kernel/mllm_kernel/cuda/jit/store_cache.py b/mllm-kernel/mllm_kernel/cuda/jit/store_cache.py
new file mode 100644
index 00000000..96a73f5e
--- /dev/null
+++ b/mllm-kernel/mllm_kernel/cuda/jit/store_cache.py
@@ -0,0 +1,127 @@
+# Copyright (c) MLLM Team.
+# Licensed under the MIT License.
+#
+# Python interface for the store_cache CUDA kernel.
+# Efficiently scatters key/value tensors into a pre-allocated KV cache pool.
+
+from __future__ import annotations
+
+import logging
+import torch
+from mllm_kernel.jit_utils import jit
+from mllm_kernel.jit_utils.compile import cache_once, make_cpp_args
+
+
+logger = logging.getLogger(__name__)
+
+
+@cache_once
+def _is_arch_support_pdl() -> bool:
+    if not torch.cuda.is_available():
+        return False
+    major, minor = torch.cuda.get_device_capability()
+    # PDL requires sm_90a (Hopper) or later
+    return major > 9 or (major == 9 and minor >= 0)
+
+
+def _make_store_cache_kernel(row_bytes: int):
+    """Create a JIT-compiled store_cache kernel for the given row_bytes."""
+    pdl = _is_arch_support_pdl()
+    cpp_args = make_cpp_args(row_bytes, pdl)
+
+    @jit(
+        args=[row_bytes, pdl],
+        device="cuda",
+        cuda_files=["store_cache.cuh"],
+        cpp_wrappers=[],
+        cuda_wrappers=[
+            ("store_cache", f"StoreKVCacheKernel<{cpp_args}>::run"),
+        ],
+        func_name="store_cache",
+    )
+    def _kernel(
+        compiled_module,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        k_cache: torch.Tensor,
+        v_cache: torch.Tensor,
+        indices: torch.Tensor,
+        num_split: int,
+    ) -> None:
+        compiled_module.store_cache(k, v, k_cache, v_cache, indices, num_split)
+
+    return _kernel
+
+
+_KERNEL_CACHE: dict[int, object] = {}
+
+
+def _get_kernel(row_bytes: int):
+    if row_bytes not in _KERNEL_CACHE:
+        _KERNEL_CACHE[row_bytes] = _make_store_cache_kernel(row_bytes)
+    return _KERNEL_CACHE[row_bytes]
+
+
+@cache_once
+def can_use_store_cache(row_bytes: int) -> bool:
+    """Check whether the JIT store_cache kernel supports the given row size.
+
+    Returns ``False`` if *row_bytes* is not a multiple of 4 or if the JIT
+    compilation fails for any reason.
+    """
+    if row_bytes % 4 != 0:
+        logger.warning(
+            "Unsupported row_bytes=%d for JIT store_cache kernel: "
+            "must be multiple of 4",
+            row_bytes,
+        )
+        return False
+    try:
+        _get_kernel(row_bytes)
+        return True
+    except Exception as e:
+        logger.warning(
+            "Failed to load JIT store_cache kernel with row_bytes=%d: %s",
+            row_bytes,
+            e,
+        )
+        return False
+
+
+def store_cache(
+    k: torch.Tensor,
+    v: torch.Tensor,
+    k_cache: torch.Tensor,
+    v_cache: torch.Tensor,
+    indices: torch.Tensor,
+    *,
+    row_bytes: int = 0,
+    num_split: int = 0,
+) -> None:
+    """Store key and value tensors into a KV cache at specified indices.
+
+    Each row of *k* (and *v*) is scattered into *k_cache* (and *v_cache*)
+    at the location given by the corresponding entry in *indices*.
+
+    Args:
+        k: Key tensor, shape ``(batch_size, head_num * head_dim)``.
+        v: Value tensor, shape ``(batch_size, head_num * head_dim)``.
+        k_cache: Key cache, shape ``(num_slots, head_num * head_dim)``.
+        v_cache: Value cache, shape ``(num_slots, head_num * head_dim)``.
+        indices: Index tensor, shape ``(batch_size,)``, dtype int32 or int64.
+        row_bytes: Bytes per row. Auto-detected from *k* when 0.
+        num_split: Number of warps that cooperate on each element (1, 2, or 4).
+            When 0 the best value is chosen automatically based on alignment.
+    """
+    row_bytes = row_bytes or k.shape[-1] * k.element_size()
+    kernel = _get_kernel(row_bytes)
+
+    if num_split <= 0:
+        if row_bytes % 2048 == 0:
+            num_split = 4
+        elif row_bytes % 1024 == 0:
+            num_split = 2
+        else:
+            num_split = 1
+
+    kernel(k, v, k_cache, v_cache, indices, num_split)
diff --git a/mllm-kernel/tests/test_store_cache.py b/mllm-kernel/tests/test_store_cache.py
new file mode 100644
index 00000000..5e4f1bcc
--- /dev/null
+++ b/mllm-kernel/tests/test_store_cache.py
@@ -0,0 +1,66 @@
+from __future__ import annotations
+
+import pytest
+import torch
+
+from mllm_kernel.cuda.jit import can_use_store_cache, store_cache
+
+
+def _make_inputs(
+    *,
+    batch_size: int,
+    num_slots: int,
+    row_dim: int,
+    dtype: torch.dtype,
+    index_dtype: torch.dtype,
+    seed: int = 0,
+):
+    torch.manual_seed(seed)
+    device = "cuda"
+    k = torch.randn(batch_size, row_dim, device=device, dtype=dtype)
+    v = torch.randn(batch_size, row_dim, device=device, dtype=dtype)
+    # Use unique indices to avoid write conflicts on the same cache slot.
+    indices = torch.randperm(num_slots, device=device)[:batch_size].to(index_dtype)
+    k_cache = torch.zeros(num_slots, row_dim, device=device, dtype=dtype)
+    v_cache = torch.zeros_like(k_cache)
+    return k, v, k_cache, v_cache, indices
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is required")
+@pytest.mark.parametrize("dtype", [torch.float16, torch.float32])
+@pytest.mark.parametrize("index_dtype", [torch.int32, torch.int64])
+def test_store_cache_matches_torch_index(dtype: torch.dtype, index_dtype: torch.dtype):
+    batch_size = 257
+    num_slots = 4096
+    row_dim = 8 * 128  # 1024 -> fp16 row_bytes=2048
+    row_bytes = row_dim * torch.tensor([], dtype=dtype).element_size()
+
+    assert can_use_store_cache(row_bytes), f"store_cache unavailable for row_bytes={row_bytes}"
+
+    k, v, k_cache, v_cache, indices = _make_inputs(
+        batch_size=batch_size,
+        num_slots=num_slots,
+        row_dim=row_dim,
+        dtype=dtype,
+        index_dtype=index_dtype,
+        seed=2026,
+    )
+
+    k_ref = k_cache.clone()
+    v_ref = v_cache.clone()
+    k_ref[indices] = k
+    v_ref[indices] = v
+
+    store_cache(k, v, k_cache, v_cache, indices)
+    torch.cuda.synchronize()
+
+    assert torch.equal(k_cache, k_ref)
+    assert torch.equal(v_cache, v_ref)
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is required")
+def test_can_use_store_cache_rejects_invalid_row_bytes():
+    assert not can_use_store_cache(2)
+    assert not can_use_store_cache(6)
+    assert can_use_store_cache(4)
+
diff --git a/pymllm/engine/io_struct.py b/pymllm/engine/io_struct.py
new file mode 100644
index 00000000..777186e2
--- /dev/null
+++ b/pymllm/engine/io_struct.py
@@ -0,0 +1,196 @@
+from __future__ import annotations
+
+import uuid
+from dataclasses import dataclass, field
+from typing import Any, Dict, Iterator, List, Optional, Union
+
+
+@dataclass
+class BaseReq:
+    rid: Optional[Union[str, List[str]]] = field(default=None, kw_only=True)
+
+    def regenerate_rid(self) -> Union[str, List[str]]:
+        if isinstance(self.rid, list):
+            self.rid = [uuid.uuid4().hex for _ in range(len(self.rid))]
+        else:
+            self.rid = uuid.uuid4().hex
+        return self.rid
+
+
+@dataclass
+class BaseBatchReq:
+    rids: List[str]
+
+    def regenerate_rids(self) -> List[str]:
+        self.rids = [uuid.uuid4().hex for _ in range(len(self.rids))]
+        return self.rids
+
+
+@dataclass
+class GenerateReqInput(BaseReq):
+    text: Optional[Union[List[str], str]] = None
+    input_ids: Optional[Union[List[List[int]], List[int]]] = None
+    sampling_params: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None
+    return_logprob: Optional[Union[List[bool], bool]] = None
+    logprob_start_len: Optional[Union[List[int], int]] = None
+    top_logprobs_num: Optional[Union[List[int], int]] = None
+    stream: bool = False
+
+    # Multimodal placeholders.
+    image_data: Optional[Any] = None
+    video_data: Optional[Any] = None
+    audio_data: Optional[Any] = None
+
+    # Runtime extension placeholders.
+    lora_path: Optional[Union[List[Optional[str]], str]] = None
+    session_params: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None
+    extra_options: Dict[str, Any] = field(default_factory=dict)
+
+    # Derived fields populated by normalization.
+    is_single: bool = field(default=True, init=False)
+    batch_size: int = field(default=1, init=False)
+
+    def normalize_batch_and_arguments(self) -> None:
+        self._validate_inputs()
+        self._determine_batch_size()
+
+    def _validate_inputs(self) -> None:
+        has_text = self.text is not None
+        has_input_ids = self.input_ids is not None
+        if has_text == has_input_ids:
+            raise ValueError("Exactly one of `text` or `input_ids` must be provided.")
+
+    def _determine_batch_size(self) -> None:
+        if self.text is not None:
+            if isinstance(self.text, str):
+                self.is_single = True
+                self.batch_size = 1
+            else:
+                if len(self.text) == 0:
+                    raise ValueError("`text` cannot be an empty list.")
+                self.is_single = False
+                self.batch_size = len(self.text)
+            return
+
+        assert self.input_ids is not None
+        if len(self.input_ids) == 0:
+            raise ValueError("`input_ids` cannot be empty.")
+        if isinstance(self.input_ids[0], int):
+            self.is_single = True
+            self.batch_size = 1
+        else:
+            self.is_single = False
+            self.batch_size = len(self.input_ids)
+
+    def __getitem__(self, i: int) -> "GenerateReqInput":
+        if i < 0 or i >= self.batch_size:
+            raise IndexError(f"index {i} out of range for batch size {self.batch_size}")
+        if self.batch_size == 1:
+            return self
+        return GenerateReqInput(
+            rid=self._pick(self.rid, i),
+            text=self._pick(self.text, i),
+            input_ids=self._pick(self.input_ids, i),
+            sampling_params=self._pick(self.sampling_params, i),
+            return_logprob=self._pick(self.return_logprob, i),
+            logprob_start_len=self._pick(self.logprob_start_len, i),
+            top_logprobs_num=self._pick(self.top_logprobs_num, i),
+            stream=self.stream,
+            image_data=self._pick(self.image_data, i),
+            video_data=self._pick(self.video_data, i),
+            audio_data=self._pick(self.audio_data, i),
+            lora_path=self._pick(self.lora_path, i),
+            session_params=self._pick(self.session_params, i),
+            extra_options=self.extra_options.copy(),
+        )
+
+    @staticmethod
+    def _pick(value: Any, i: int) -> Any:
+        if isinstance(value, list):
+            return value[i]
+        return value
+
+    def to_request_dict(self) -> Dict[str, Any]:
+        payload: Dict[str, Any] = {}
+        for key, value in {
+            "rid": self.rid,
+            "text": self.text,
+            "input_ids": self.input_ids,
+            "sampling_params": self.sampling_params,
+            "return_logprob": self.return_logprob,
+            "logprob_start_len": self.logprob_start_len,
+            "top_logprobs_num": self.top_logprobs_num,
+            "stream": self.stream,
+            "image_data": self.image_data,
+            "video_data": self.video_data,
+            "audio_data": self.audio_data,
+            "lora_path": self.lora_path,
+            "session_params": self.session_params,
+        }.items():
+            if value is not None:
+                payload[key] = value
+        payload.update(self.extra_options)
+        return payload
+
+
+@dataclass
+class TokenizedGenerateReqInput(BaseReq):
+    input_text: str = ""
+    input_ids: List[int] = field(default_factory=list)
+    sampling_params: Dict[str, Any] = field(default_factory=dict)
+    stream: bool = False
+    return_logprob: bool = False
+    logprob_start_len: int = -1
+    top_logprobs_num: int = 0
+    lora_path: Optional[str] = None
+    session_params: Optional[Dict[str, Any]] = None
+
+
+@dataclass
+class BatchTokenizedGenerateReqInput(BaseBatchReq):
+    reqs: List[TokenizedGenerateReqInput]
+
+    def __len__(self) -> int:
+        return len(self.reqs)
+
+    def __getitem__(self, i: int) -> TokenizedGenerateReqInput:
+        return self.reqs[i]
+
+    def __iter__(self) -> Iterator[TokenizedGenerateReqInput]:
+        return iter(self.reqs)
+
+
+@dataclass
+class BatchTokenIDOutput(BaseBatchReq):
+    finished_reasons: List[Optional[str]]
+    decode_ids: List[int]
+    read_offsets: List[int]
+    output_ids: Optional[List[int]]
+    skip_special_tokens: List[bool]
+    prompt_tokens: List[int]
+    completion_tokens: List[int]
+    input_token_logprobs_val: List[float] = field(default_factory=list)
+    input_token_logprobs_idx: List[int] = field(default_factory=list)
+    output_token_logprobs_val: List[float] = field(default_factory=list)
+    output_token_logprobs_idx: List[int] = field(default_factory=list)
+    input_top_logprobs_val: List[List[float]] = field(default_factory=list)
+    input_top_logprobs_idx: List[List[int]] = field(default_factory=list)
+    output_top_logprobs_val: List[List[float]] = field(default_factory=list)
+    output_top_logprobs_idx: List[List[int]] = field(default_factory=list)
+
+
+@dataclass
+class BatchStrOutput(BaseBatchReq):
+    finished_reasons: List[Optional[str]]
+    output_strs: List[str]
+    output_ids: Optional[List[int]]
+    prompt_tokens: List[int]
+    completion_tokens: List[int]
+    input_token_logprobs_val: List[float] = field(default_factory=list)
+    input_token_logprobs_idx: List[int] = field(default_factory=list)
+    output_token_logprobs_val: List[float] = field(default_factory=list)
+    output_token_logprobs_idx: List[int] = field(default_factory=list)
+    input_top_logprobs_val: List[List[float]] = field(default_factory=list)
+    input_top_logprobs_idx: List[List[int]] = field(default_factory=list)
+    output_top_logprobs_val: List[List[float]] = field(default_factory=list)
+    output_top_logprobs_idx: List[List[int]] = field(default_factory=list)
diff --git a/pymllm/engine/launch.py b/pymllm/engine/launch.py
index 25ada7c7..edad97af 100644
--- a/pymllm/engine/launch.py
+++ b/pymllm/engine/launch.py
@@ -1,38 +1,310 @@
+import asyncio
+import atexit
 import logging
+import os
+import uuid
 from pathlib import Path
-from typing import Optional
+from typing import Any, AsyncIterator, Dict, List, Optional, Union
 
-import zmq
 import torch
 import torch.multiprocessing as mp
 from transformers import AutoConfig
 from huggingface_hub import snapshot_download
+
 from pymllm.configs import get_global_config
-from pymllm.orchestrator.tokenizer_process import TokenizerProcess
-from pymllm.orchestrator.detokenizer_process import DetokenizerProcess
-from pymllm.orchestrator.model_runner_process import ModelRunnerProcess
-from pymllm.orchestrator.async_disk_io_process import AsyncDiskIoProcess
-from pymllm.orchestrator.request_response_process import RequestResponseProcess
+from pymllm.engine.io_struct import GenerateReqInput
+from pymllm.orchestrator.ipc_utils import make_ipc_address
+from pymllm.orchestrator.request_response_process import (
+    ReqState,
+    RequestResponseProcess,
+)
+from pymllm.orchestrator.tokenizer_process import run_tokenizer_process
+from pymllm.orchestrator.scheduler_process import run_scheduler_process
+from pymllm.orchestrator.model_runner_process import run_model_runner_process
+from pymllm.orchestrator.detokenizer_process import run_detokenizer_process
+from pymllm.orchestrator.async_disk_io_process import run_async_disk_io_process
 
 logger = logging.getLogger(__name__)
 
 
 class Engine:
     def __init__(self):
+        self._subprocesses: List[mp.Process] = []
+        self._rr_process: Optional[RequestResponseProcess] = None
         self._config_logging()
         self._set_default_torch_dtype()
         self._check_model_and_tokenizer()
 
-        # Orchestrator, shall we start the music here?
+    def launch(self) -> None:
         self._launch_processes()
+        atexit.register(self.shutdown)
 
-    def _launch_processes(self):
-        """
-        TODO issue processes here
+    def _launch_processes(self) -> None:
+        """Spawn all subprocess workers and wire up ZMQ IPC channels."""
+        mp.set_start_method("spawn", force=True)
+        uid = str(os.getpid())
+
+        # IPC addresses for ZMQ communication between processes
+        addr_request_response_to_tokenizer: str = make_ipc_address(
+            "request_response_to_tokenizer", uid
+        )
+        addr_tokenizer_to_scheduler: str = make_ipc_address(
+            "tokenizer_to_scheduler", uid
+        )
+        addr_scheduler_to_model_runner: str = make_ipc_address(
+            "scheduler_to_model_runner", uid
+        )
+        addr_model_runner_to_scheduler: str = make_ipc_address(
+            "model_runner_to_scheduler", uid
+        )
+        addr_scheduler_to_detokenizer: str = make_ipc_address(
+            "scheduler_to_detokenizer", uid
+        )
+        addr_detokenizer_to_request_response: str = make_ipc_address(
+            "detokenizer_to_request_response", uid
+        )
+        addr_scheduler_to_disk_io: str = make_ipc_address("scheduler_to_disk_io", uid)
+
+        # Record all subprocesses
+        procs_and_readers: List[tuple] = []
+
+        # Tokenizer
+        tokenizer_reader, tokenizer_writer = mp.Pipe(duplex=False)
+        tokenizer_proc = mp.Process(
+            target=run_tokenizer_process,
+            args=(
+                addr_request_response_to_tokenizer,
+                addr_tokenizer_to_scheduler,
+                tokenizer_writer,
+            ),
+            daemon=True,
+        )
+        procs_and_readers.append((tokenizer_proc, tokenizer_reader, "tokenizer"))
+
+        # Scheduler
+        scheduler_reader, scheduler_writer = mp.Pipe(duplex=False)
+        scheduler_proc = mp.Process(
+            target=run_scheduler_process,
+            args=(
+                addr_tokenizer_to_scheduler,
+                addr_scheduler_to_model_runner,
+                addr_model_runner_to_scheduler,
+                addr_scheduler_to_detokenizer,
+                scheduler_writer,
+            ),
+            daemon=True,
+        )
+        procs_and_readers.append((scheduler_proc, scheduler_reader, "scheduler"))
+
+        # Model Runner
+        model_runner_reader, model_runner_writer = mp.Pipe(duplex=False)
+        model_runner_proc = mp.Process(
+            target=run_model_runner_process,
+            args=(
+                addr_scheduler_to_model_runner,
+                addr_model_runner_to_scheduler,
+                model_runner_writer,
+            ),
+            daemon=True,
+        )
+        procs_and_readers.append(
+            (model_runner_proc, model_runner_reader, "model_runner")
+        )
+
+        # Detokenizer
+        detokenizer_reader, detokenizer_writer = mp.Pipe(duplex=False)
+        detokenizer_proc = mp.Process(
+            target=run_detokenizer_process,
+            args=(
+                addr_scheduler_to_detokenizer,
+                addr_detokenizer_to_request_response,
+                detokenizer_writer,
+            ),
+            daemon=True,
+        )
+        procs_and_readers.append((detokenizer_proc, detokenizer_reader, "detokenizer"))
+
+        # Async Disk I/O
+        if get_global_config().server.enable_disk_io_async:
+            disk_io_reader, disk_io_writer = mp.Pipe(duplex=False)
+            disk_io_proc = mp.Process(
+                target=run_async_disk_io_process,
+                args=(addr_scheduler_to_disk_io, disk_io_writer),
+                daemon=True,
+            )
+            procs_and_readers.append((disk_io_proc, disk_io_reader, "async_disk_io"))
+
+        # Start all subprocesses
+        for proc, _, name in procs_and_readers:
+            proc.start()
+            self._subprocesses.append(proc)
+            logger.info("Started %s process (pid=%s)", name, proc.pid)
+
+        # Wait for readiness signals
+        for _, reader, name in procs_and_readers:
+            try:
+                msg = reader.recv()
+            except EOFError:
+                raise RuntimeError(f"{name} process died before signalling readiness")
+            if msg.get("status") != "ready":
+                raise RuntimeError(f"{name} process failed to initialise: {msg}")
+            logger.info("%s process ready", name)
+
+        # RR Process is current main process
+        self._rr_process = RequestResponseProcess(
+            send_to_tokenizer_addr=addr_request_response_to_tokenizer,
+            recv_from_detokenizer_addr=addr_detokenizer_to_request_response,
+        )
+
+        try:
+            self._loop = asyncio.get_running_loop()
+        except RuntimeError:
+            self._loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(self._loop)
+
+        self._rr_process.start(self._loop)
+        logger.info("RequestResponseProcess started in main process")
+
+    def generate(
+        self,
+        prompt: Optional[Union[List[str], str]] = None,
+        sampling_params: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None,
+        input_ids: Optional[Union[List[List[int]], List[int]]] = None,
+        image_data: Optional[Any] = None,
+        audio_data: Optional[Any] = None,
+        video_data: Optional[Any] = None,
+        return_logprob: Optional[Union[List[bool], bool]] = None,
+        logprob_start_len: Optional[Union[List[int], int]] = None,
+        top_logprobs_num: Optional[Union[List[int], int]] = None,
+        lora_path: Optional[Union[List[Optional[str]], str]] = None,
+        session_params: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None,
+        stream: bool = False,
+        rid: Optional[Union[List[str], str]] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Synchronous, non-streaming generation entry point."""
+        if rid is None:
+            rid = uuid.uuid4().hex
+        request = GenerateReqInput(
+            rid=rid,
+            text=prompt,
+            input_ids=input_ids,
+            sampling_params=sampling_params,
+            return_logprob=return_logprob,
+            logprob_start_len=logprob_start_len,
+            top_logprobs_num=top_logprobs_num,
+            stream=stream,
+            image_data=image_data,
+            audio_data=audio_data,
+            video_data=video_data,
+            lora_path=lora_path,
+            session_params=session_params,
+            extra_options=kwargs,
+        )
+        request.normalize_batch_and_arguments()
+
+        async def _run() -> Dict[str, Any]:
+            state = await self._rr_process.add_request(request)
+            if isinstance(rid, list):
+                raise ValueError("Synchronous `generate` currently supports single request.")
+            return await self._wait_for_final_result(rid, state)
+
+        return self._loop.run_until_complete(_run())
+
+    async def generate_async(
+        self,
+        prompt: Optional[Union[List[str], str]] = None,
+        sampling_params: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None,
+        input_ids: Optional[Union[List[List[int]], List[int]]] = None,
+        image_data: Optional[Any] = None,
+        audio_data: Optional[Any] = None,
+        video_data: Optional[Any] = None,
+        return_logprob: Optional[Union[List[bool], bool]] = None,
+        logprob_start_len: Optional[Union[List[int], int]] = None,
+        top_logprobs_num: Optional[Union[List[int], int]] = None,
+        lora_path: Optional[Union[List[Optional[str]], str]] = None,
+        session_params: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None,
+        stream: bool = False,
+        rid: Optional[Union[List[str], str]] = None,
+        **kwargs,
+    ) -> AsyncIterator[Dict[str, Any]]:
+        """Asynchronous generation entry point.
+
+        When *stream* is ``False`` (default) the returned async iterator
+        yields a **single** final result dict.  When *stream* is ``True``
+        every incremental chunk from the detokenizer is yielded as it
+        arrives, following the ``Event + out_list`` pattern.
         """
+        if rid is None:
+            rid = uuid.uuid4().hex
+        request = GenerateReqInput(
+            rid=rid,
+            text=prompt,
+            input_ids=input_ids,
+            sampling_params=sampling_params,
+            return_logprob=return_logprob,
+            logprob_start_len=logprob_start_len,
+            top_logprobs_num=top_logprobs_num,
+            stream=stream,
+            image_data=image_data,
+            audio_data=audio_data,
+            video_data=video_data,
+            lora_path=lora_path,
+            session_params=session_params,
+            extra_options=kwargs,
+        )
+        request.normalize_batch_and_arguments()
+        state = await self._rr_process.add_request(request)
+
+        try:
+            if isinstance(rid, list):
+                raise ValueError("`generate_async` currently supports single request only.")
+            if stream:
+                async for chunk in self._stream_results(rid, state):
+                    yield chunk
+            else:
+                yield await self._wait_for_final_result(rid, state)
+        finally:
+            self._rr_process.remove_state(rid)
+
+    @staticmethod
+    async def _wait_for_final_result(rid: str, state: ReqState) -> Dict[str, Any]:
+        """Block until the request is finished and return the last output."""
+        while True:
+            await state.event.wait()
+            if state.finished:
+                return state.out_list[-1]
+            state.event.clear()
+
+    @staticmethod
+    async def _stream_results(
+        rid: str, state: ReqState
+    ) -> AsyncIterator[Dict[str, Any]]:
+        """Yield incremental chunks as they arrive, until finished."""
+        while True:
+            await state.event.wait()
+            for item in state.out_list:
+                yield item
+            state.out_list.clear()
+            if state.finished:
+                return
+            state.event.clear()
 
-        # RR process is the main process
-        self._rr_process = RequestResponseProcess()
+    def shutdown(self) -> None:
+        """Terminate all subprocesses."""
+        if self._rr_process is not None:
+            try:
+                self._loop.run_until_complete(self._rr_process.shutdown())
+            except Exception:
+                pass
+        for proc in self._subprocesses:
+            if proc.is_alive():
+                proc.terminate()
+                proc.join(timeout=5)
+                if proc.is_alive():
+                    proc.kill()
+        self._subprocesses.clear()
+        logger.info("All subprocesses shut down")
 
     def _set_default_torch_dtype(self):
         """Set the default torch dtype based on the server configuration."""
@@ -93,23 +365,13 @@ def _check_model_and_tokenizer(self):
 
     @staticmethod
     def _maybe_download(path: Path, download_dir: Optional[Path] = None) -> Path:
-        """Return a local directory for *path*, downloading if necessary."""
         if path.is_dir():
             return path
-
         repo_id = str(path)
         logger.info("Downloading '%s' ...", repo_id)
-
         kwargs = {}
         if download_dir is not None:
             kwargs["local_dir"] = str(download_dir / path.name)
-
         downloaded = snapshot_download(repo_id=repo_id, **kwargs)
         logger.info("Downloaded '%s' to '%s'", repo_id, downloaded)
         return Path(downloaded)
-
-    def generate(self, stream: bool = True):
-        pass
-
-    async def generate_async(self, stream: bool = True):
-        pass
diff --git a/pymllm/mem_cache/__init__.py b/pymllm/mem_cache/__init__.py
index e69de29b..c2ce06eb 100644
--- a/pymllm/mem_cache/__init__.py
+++ b/pymllm/mem_cache/__init__.py
@@ -0,0 +1,37 @@
+from pymllm.mem_cache.memory_pool import (
+    KVPool,
+    ReqToTokenPool,
+    TokenToKVPoolAllocator,
+    make_full_attention_net_mem_pool,
+    make_req_to_token_pool,
+)
+from pymllm.mem_cache.radix_cache import (
+    EvictResult,
+    InsertResult,
+    MatchResult,
+    RadixCache,
+    RadixKey,
+    TreeNode,
+    hash_bytes,
+    hash_to_int64,
+    hash_token_ids,
+)
+
+__all__ = [
+    # memory_pool
+    "KVPool",
+    "TokenToKVPoolAllocator",
+    "ReqToTokenPool",
+    "make_full_attention_net_mem_pool",
+    "make_req_to_token_pool",
+    # radix_cache
+    "RadixCache",
+    "RadixKey",
+    "TreeNode",
+    "MatchResult",
+    "InsertResult",
+    "EvictResult",
+    "hash_token_ids",
+    "hash_to_int64",
+    "hash_bytes",
+]
diff --git a/pymllm/mem_cache/memory_pool.py b/pymllm/mem_cache/memory_pool.py
new file mode 100644
index 00000000..0721fd71
--- /dev/null
+++ b/pymllm/mem_cache/memory_pool.py
@@ -0,0 +1,480 @@
+"""Lightweight KV-cache memory pools
+
+Three-layer architecture::
+
+    ReqToTokenPool          maps  (req_slot, position) → kv_index
+    TokenToKVPoolAllocator  manages a free-list of integer indices
+    KVPool                  holds the actual GPU K/V tensors
+
+All indices are **int64** tensors on the target device.  Slot 0 in the KV
+buffers is reserved as a padding / dummy-output slot and is never allocated.
+"""
+
+import logging
+from typing import List, Optional, Tuple, Union
+
+import torch
+
+from mllm_kernel.cuda.jit.store_cache import store_cache, can_use_store_cache
+
+logger = logging.getLogger(__name__)
+
+
+class KVPool:
+    """GPU (or CPU) storage for per-layer key and value caches.
+
+    Layout per layer::
+
+    JIT:
+        k_buffer[layer][slot, k_head_num * k_head_dim]
+        v_buffer[layer][slot, v_head_num * v_head_dim]
+
+    PyTorch:
+        k_buffer[layer][slot, k_head_num, k_head_dim]
+        v_buffer[layer][slot, v_head_num, v_head_dim]
+
+    K and V may have **independent** head counts and head dimensions, which
+    covers standard MHA, GQA / MQA, and architectures like MLA where value
+    projection uses a different dimensionality.
+
+    ``size`` usable slots are numbered ``[1, size]``.  Slot 0 is a dummy
+    padding slot that absorbs writes from padded tokens.
+
+    Parameters
+    ----------
+    size : int
+        Number of usable token slots (total buffer length = ``size + 1``).
+    layer_num : int
+        Number of transformer layers (one K buffer + one V buffer per layer).
+    k_head_num : int
+        Number of key heads.
+    k_head_dim : int
+        Dimension of each key head.
+    device : str | torch.device
+        Target device (``"cuda"``, ``"cpu"``, …).
+    dtype : torch.dtype
+        Storage data type.
+    v_head_num : int, optional
+        Number of value heads.  Defaults to *k_head_num*.
+    v_head_dim : int, optional
+        Dimension of each value head.  Defaults to *k_head_dim*.
+    pin_memory : bool, optional
+        Whether to use pinned memory.  Defaults to True.
+    """
+
+    def __init__(
+        self,
+        size: int,
+        layer_num: int,
+        k_head_num: int,
+        k_head_dim: int,
+        device: Union[str, torch.device] = "cuda",
+        dtype: torch.dtype = torch.float16,
+        v_head_num: Optional[int] = None,
+        v_head_dim: Optional[int] = None,
+        pin_memory: bool = True,
+    ):
+        self.size = size
+        self.layer_num = layer_num
+        self.k_head_num = k_head_num
+        self.k_head_dim = k_head_dim
+        self.v_head_num = v_head_num if v_head_num is not None else k_head_num
+        self.v_head_dim = v_head_dim if v_head_dim is not None else k_head_dim
+        self.device = torch.device(device)
+        self.dtype = dtype
+
+        buf_len = size + 1  # slot 0 is padding
+
+        if buf_len % 8 != 0:
+            logger.warning(
+                "KVPool buffer length is not divisible by 8, padding to the next multiple of 8"
+            )
+            buf_len = (buf_len + 7) & ~7
+
+        k_row_dim = self.k_head_num * self.k_head_dim
+        v_row_dim = self.v_head_num * self.v_head_dim
+        self._same_kv_dim = k_row_dim == v_row_dim
+        self._row_bytes = k_row_dim * torch.tensor([], dtype=dtype).element_size()
+        self._use_jit = (
+            self.device.type == "cuda"
+            and self._same_kv_dim
+            and can_use_store_cache(self._row_bytes)
+        )
+        if not self._use_jit:
+            logger.warning(
+                f"Fallback to PyTorch index for KVPool, which is slower than the mllm-kernel's implementation, same_kv_dim={self._same_kv_dim}, row_bytes={self._row_bytes}"
+            )
+
+        self.k_buffer: List[torch.Tensor] = [
+            torch.zeros(
+                (buf_len, self.k_head_num, self.k_head_dim),
+                dtype=dtype,
+                device=self.device,
+                pin_memory=pin_memory,
+            )
+            for _ in range(layer_num)
+        ]
+        self.v_buffer: List[torch.Tensor] = [
+            torch.zeros(
+                (buf_len, self.v_head_num, self.v_head_dim),
+                dtype=dtype,
+                device=self.device,
+                pin_memory=pin_memory,
+            )
+            for _ in range(layer_num)
+        ]
+
+        # Pre-computed 2D views for the JIT store_cache kernel.
+        # Zero-copy: same underlying storage as k_buffer / v_buffer.
+        if self._use_jit:
+            self._k_buffer_2d = [b.view(buf_len, -1) for b in self.k_buffer]
+            self._v_buffer_2d = [b.view(buf_len, -1) for b in self.v_buffer]
+
+        logger.info(
+            "KVPool allocated: %d layers, %d slots, K=[%d,%d] V=[%d,%d], %.2f GB",
+            layer_num,
+            size,
+            self.k_head_num,
+            self.k_head_dim,
+            self.v_head_num,
+            self.v_head_dim,
+            self._mem_bytes() / (1 << 30),
+        )
+
+    def get_key_buffer(self, layer_id: int) -> torch.Tensor:
+        return self.k_buffer[layer_id]
+
+    def get_value_buffer(self, layer_id: int) -> torch.Tensor:
+        return self.v_buffer[layer_id]
+
+    def get_kv_buffer(self, layer_id: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        return self.k_buffer[layer_id], self.v_buffer[layer_id]
+
+    def set_kv_buffer(
+        self,
+        layer_id: int,
+        indices: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+    ) -> None:
+        """Write K/V vectors into the cache at the given *indices*.
+
+        ``k`` / ``v`` can be any shape as long as the trailing dimensions
+        multiply to ``head_num * head_dim`` (the row dimension).  All leading
+        dimensions are treated as the batch axis and must match ``indices``
+        after flattening.  Typical shapes::
+
+            k: [num_tokens, head_num, head_dim]          indices: [num_tokens]
+            k: [batch, seq_len, head_num, head_dim]      indices: [batch, seq_len]
+            k: [num_tokens, head_num * head_dim]          indices: [num_tokens]
+        """
+        if self._use_jit:
+            row_dim = self.k_head_num * self.k_head_dim
+            store_cache(
+                k.reshape(-1, row_dim),
+                v.reshape(-1, row_dim),
+                self._k_buffer_2d[layer_id],
+                self._v_buffer_2d[layer_id],
+                indices.reshape(-1),
+                row_bytes=self._row_bytes,
+            )
+        else:
+            self.k_buffer[layer_id][indices] = k
+            self.v_buffer[layer_id][indices] = v
+
+    def _mem_bytes(self) -> int:
+        total = 0
+        for buf in self.k_buffer + self.v_buffer:
+            total += buf.nelement() * buf.element_size()
+        return total
+
+
+class TokenToKVPoolAllocator:
+    """Manages allocation / deallocation of integer indices into a :class:`KVPool`.
+
+    Each ``alloc(n)`` returns *n* free indices; each ``free(indices)`` returns
+    them to the pool.
+
+    Uses a **dual-buffer** strategy (``free_slots`` + ``release_slots``) so
+    that ``free()`` never cats onto the large main free-list.  Freed indices
+    accumulate in the smaller ``release_slots`` and are merged lazily (with an
+    optional sort) only when ``alloc()`` cannot be satisfied from
+    ``free_slots`` alone.
+
+    A **batch-free** API (``free_group_begin`` / ``free_group_end``) further
+    amortises cost when many ``free()`` calls happen in a tight loop (e.g.
+    during scheduling or eviction).
+
+    Typical usage::
+
+        allocator = TokenToKVPoolAllocator(size=4096, device="cuda")
+
+        # --- basic alloc / free ---
+        indices = allocator.alloc(128)      # 128 free slot indices (int64)
+        allocator.free(indices[:64])        # return 64 slots
+
+        # --- batch free (amortised) ---
+        allocator.free_group_begin()
+        for req in finished_requests:
+            allocator.free(req.kv_indices)  # O(1) list append each
+        allocator.free_group_end()          # single torch.cat + release
+
+    Parameters
+    ----------
+    size : int
+        Total number of allocatable slots (must match ``KVPool.size``).
+    device : str | torch.device
+        Device for the free-list tensor.
+    page_size : int
+        When > 1 the allocator works in page-aligned mode: ``alloc`` returns
+        multiples of ``page_size`` contiguous within each page, and ``free``
+        deduplicates by page.
+    need_sort : bool
+        When ``True`` (default), ``merge_and_sort_free`` sorts after merging
+        so that lower-index slots are allocated first (better memory locality).
+    """
+
+    def __init__(
+        self,
+        size: int,
+        device: Union[str, torch.device] = "cuda",
+        page_size: int = 1,
+        need_sort: bool = True,
+    ):
+        self.size = size
+        self.page_size = page_size
+        self.device = torch.device(device)
+        self.need_sort = need_sort
+        self.clear()
+
+    def clear(self) -> None:
+        """Reset the allocator so that all slots ``[1, size]`` are free. The first slot is reserved for padding."""
+        if self.page_size == 1:
+            self.free_slots = torch.arange(
+                1, self.size + 1, dtype=torch.int64, device=self.device
+            )
+        else:
+            num_pages = self.size // self.page_size
+            self.free_slots = torch.arange(
+                1, num_pages + 1, dtype=torch.int64, device=self.device
+            )
+        self.release_slots = torch.empty((0,), dtype=torch.int64, device=self.device)
+        self._is_not_in_free_group = True
+        self._free_group: List[torch.Tensor] = []
+
+    def available_size(self) -> int:
+        """Number of tokens that can still be allocated."""
+        return (len(self.free_slots) + len(self.release_slots)) * self.page_size
+
+    def merge_and_sort_free(self) -> None:
+        """Merge ``release_slots`` into ``free_slots`` (and sort if ``need_sort``)."""
+        if len(self.release_slots) == 0:
+            return
+        self.free_slots = torch.cat((self.free_slots, self.release_slots))
+        if self.need_sort:
+            self.free_slots, _ = torch.sort(self.free_slots)
+        self.release_slots = torch.empty((0,), dtype=torch.int64, device=self.device)
+
+    def free_group_begin(self) -> None:
+        """Start collecting ``free()`` calls; actual release is deferred to ``free_group_end``."""
+        self._is_not_in_free_group = False
+        self._free_group = []
+
+    def free_group_end(self) -> None:
+        """Flush all ``free()`` calls collected since ``free_group_begin``."""
+        self._is_not_in_free_group = True
+        if self._free_group:
+            self.free(torch.cat(self._free_group))
+            self._free_group = []
+
+    def alloc(self, need_size: int) -> Optional[torch.Tensor]:
+        """Allocate *need_size* token indices.
+
+        Returns a 1-D ``int64`` tensor on success, or ``None`` if the pool is
+        exhausted.
+        """
+        if self.page_size == 1:
+            if need_size > len(self.free_slots):
+                self.merge_and_sort_free()
+            if need_size > len(self.free_slots):
+                return None
+            out = self.free_slots[:need_size]
+            self.free_slots = self.free_slots[need_size:]
+            return out
+
+        num_pages = (need_size + self.page_size - 1) // self.page_size
+        if num_pages > len(self.free_slots):
+            self.merge_and_sort_free()
+        if num_pages > len(self.free_slots):
+            return None
+        pages = self.free_slots[:num_pages]
+        self.free_slots = self.free_slots[num_pages:]
+        offsets = torch.arange(self.page_size, device=self.device)
+        out = (pages[:, None] * self.page_size + offsets).reshape(-1)
+        return out[:need_size]
+
+    def free(self, indices: torch.Tensor) -> None:
+        """Return *indices* to the free pool."""
+        if indices.numel() == 0:
+            return
+
+        if not self._is_not_in_free_group:
+            self._free_group.append(indices)
+            return
+
+        if self.page_size != 1:
+            indices = torch.unique(indices // self.page_size)
+
+        if self.need_sort:
+            self.release_slots = torch.cat((self.release_slots, indices))
+        else:
+            self.free_slots = torch.cat((self.free_slots, indices))
+
+
+class ReqToTokenPool:
+    """Maps each live request to its per-position KV-pool indices.
+
+    Internally a 2-D tensor ``req_to_token[slot, position]`` stores the
+    KV-pool index for every token position of every active request.
+    Slots are recycled via a simple free-list.
+
+    This class is a **pure mapping table** -- it does **not** track per-request
+    sequence lengths.  The caller (typically the ``Req`` / IO-struct object)
+    must store ``req_pool_idx`` and ``seq_len`` and use them to slice into
+    ``req_to_token`` when reading back KV indices.
+
+    Typical usage::
+
+        pool = ReqToTokenPool(max_reqs=256, max_context_len=4096)
+
+        # --- on new request arrival ---
+        [slot] = pool.alloc(1)                       # slot = req_pool_idx
+        kv_indices = kv_allocator.alloc(seq_len)      # from TokenToKVPoolAllocator
+        pool.write((slot, slice(0, seq_len)), kv_indices)
+
+        # --- read back (caller tracks seq_len) ---
+        kv_indices = pool.req_to_token[slot, :seq_len]
+
+        # --- on request completion ---
+        kv_allocator.free(pool.req_to_token[slot, :seq_len])
+        pool.free(slot)
+
+    Parameters
+    ----------
+    max_reqs : int
+        Maximum number of concurrent requests (number of rows).
+    max_context_len : int
+        Maximum sequence length any single request can reach (number of cols).
+    device : str | torch.device
+        Target device for the mapping tensor.
+    """
+
+    def __init__(
+        self,
+        max_reqs: int,
+        max_context_len: int,
+        device: Union[str, torch.device] = "cuda",
+    ):
+        self.size = max_reqs
+        self.max_context_len = max_context_len
+        self.device = torch.device(device)
+
+        self.req_to_token = torch.zeros(
+            (max_reqs, max_context_len), dtype=torch.int64, device=self.device
+        )
+        self._free_slots: List[int] = list(range(max_reqs))
+
+    def available_size(self) -> int:
+        return len(self._free_slots)
+
+    def alloc(self, n: int = 1) -> Optional[List[int]]:
+        """Allocate *n* request slots.  Returns a list of slot indices."""
+        if n > len(self._free_slots):
+            return None
+        out = self._free_slots[:n]
+        self._free_slots = self._free_slots[n:]
+        return out
+
+    def free(self, slot: int) -> None:
+        """Return a single request slot to the pool."""
+        self._free_slots.append(slot)
+
+    def write(self, index: Tuple, values: torch.Tensor) -> None:
+        """Write KV indices into the mapping table.
+
+        ``index`` is typically ``(req_pool_idx, slice(start, end))``.
+        """
+        self.req_to_token[index] = values
+
+    def clear(self) -> None:
+        self._free_slots = list(range(self.size))
+        self.req_to_token.zero_()
+
+
+def make_full_attention_net_mem_pool(
+    size: int,
+    layer_num: int,
+    k_head_num: int,
+    k_head_dim: int,
+    v_head_num: int,
+    v_head_dim: int,
+    device: Union[str, torch.device] = "cuda",
+    dtype: torch.dtype = torch.float16,
+    page_size: int = 1,
+    need_sort: bool = True,
+    pin_memory: bool = True,
+) -> Tuple[KVPool, TokenToKVPoolAllocator]:
+    """Create a :class:`KVPool` and its :class:`TokenToKVPoolAllocator` for a
+    full-attention (non-SWA) model.
+
+    Parameters
+    ----------
+    size : int
+        Number of usable token slots in the KV cache.
+    layer_num : int
+        Number of transformer layers.
+    k_head_num / k_head_dim : int
+        Key head count and dimension.
+    v_head_num / v_head_dim : int
+        Value head count and dimension.
+    device : str | torch.device
+        Target device.
+    dtype : torch.dtype
+        Storage data type for the KV buffers.
+    page_size : int
+        Allocator page size (1 = per-token, >1 = page-aligned).
+    need_sort : bool
+        Whether the allocator sorts on merge for memory locality.
+    pin_memory : bool
+        Whether to use pinned memory for the KV buffers.
+
+    Returns
+    -------
+    (KVPool, TokenToKVPoolAllocator)
+    """
+    pool = KVPool(
+        size=size,
+        layer_num=layer_num,
+        k_head_num=k_head_num,
+        k_head_dim=k_head_dim,
+        device=device,
+        dtype=dtype,
+        v_head_num=v_head_num,
+        v_head_dim=v_head_dim,
+        pin_memory=pin_memory,
+    )
+    allocator = TokenToKVPoolAllocator(
+        size=size,
+        device=device,
+        page_size=page_size,
+        need_sort=need_sort,
+    )
+    return pool, allocator
+
+
+def make_req_to_token_pool(
+    max_reqs: int,
+    max_context_len: int,
+    device: Union[str, torch.device] = "cuda",
+) -> ReqToTokenPool:
+    return ReqToTokenPool(max_reqs, max_context_len, device)
diff --git a/pymllm/mem_cache/param_disk_cache.py b/pymllm/mem_cache/param_disk_cache.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/pymllm/mem_cache/radix_cache.py b/pymllm/mem_cache/radix_cache.py
index e69de29b..997790ea 100644
--- a/pymllm/mem_cache/radix_cache.py
+++ b/pymllm/mem_cache/radix_cache.py
@@ -0,0 +1,794 @@
+"""Lightweight radix-tree KV cache with SWA and multimodal support.
+
+
+Supports:
+    - Multi-batch serving on a single GPU
+    - Sliding Window Attention (SWA) via tombstone mechanism
+    - Multimodal namespace isolation via ``extra_key``
+    - SHA256 position-aware hashing
+    - Page-aligned operations (page_size >= 1)
+    - LRU leaf eviction
+"""
+
+from __future__ import annotations
+
+import hashlib
+import heapq
+import logging
+import time
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
+
+import torch
+
+logger = logging.getLogger(__name__)
+
+
+def hash_token_ids(
+    token_ids: List[Union[int, Tuple[int, ...]]],
+    prior_hash: Optional[str] = None,
+) -> str:
+    """SHA-256 hash of a token-id page with optional chain-hash.
+
+    Each token is encoded as a 4-byte little-endian unsigned integer;
+    tuples (bigram / EAGLE) hash each element in order.  When *prior_hash*
+    is supplied the digest is seeded with the raw bytes of the previous
+    hash, making the result position-aware.
+    """
+    hasher = hashlib.sha256()
+    if prior_hash:
+        hasher.update(bytes.fromhex(prior_hash))
+    for t in token_ids:
+        if isinstance(t, tuple):
+            for elem in t:
+                hasher.update(elem.to_bytes(4, byteorder="little", signed=False))
+        else:
+            hasher.update(t.to_bytes(4, byteorder="little", signed=False))
+    return hasher.hexdigest()
+
+
+def hash_to_int64(hex_str: str) -> int:
+    """Convert a hex digest to a signed 64-bit integer (first 16 hex chars)."""
+    val = int(hex_str[:16], 16)
+    return val - (1 << 64) if val >= (1 << 63) else val
+
+
+def hash_bytes(data: bytes) -> int:
+    """SHA-256 → unsigned 64-bit int.  Useful for multimodal embedding keys."""
+    return int.from_bytes(hashlib.sha256(data).digest()[:8], "big", signed=False)
+
+
+class RadixKey:
+    """Compound lookup key: token-id sequence + optional namespace tag.
+
+    ``extra_key`` isolates independent namespaces so that sequences with
+    identical leading tokens but different adapters / LoRA ids / multimodal
+    context hashes never share prefix nodes.
+    """
+
+    __slots__ = ("token_ids", "extra_key")
+
+    def __init__(
+        self,
+        token_ids: List[Union[int, Tuple[int, ...]]],
+        extra_key: Optional[str] = None,
+    ):
+        self.token_ids = token_ids
+        self.extra_key = extra_key
+
+    def __len__(self) -> int:
+        return len(self.token_ids)
+
+    def __iter__(self) -> Iterator:
+        return iter(self.token_ids)
+
+    def __getitem__(self, idx: Union[int, slice]) -> RadixKey:
+        if isinstance(idx, slice):
+            return RadixKey(self.token_ids[idx], self.extra_key)
+        return RadixKey([self.token_ids[idx]], self.extra_key)
+
+    def __repr__(self) -> str:
+        preview = self.token_ids[:10]
+        tail = "..." if len(self.token_ids) > 10 else ""
+        return f"RadixKey(extra={self.extra_key!r}, toks={preview}{tail})"
+
+
+_node_counter: int = 0
+
+
+def _next_node_id() -> int:
+    global _node_counter
+    _node_counter += 1
+    return _node_counter
+
+
+class TreeNode:
+    """A single node in the radix tree.
+
+    ``value`` holds a 1-D ``int64`` tensor of KV-pool indices (one per token
+    in ``key``).  When the node has been evicted, ``value`` is ``None``.
+    """
+
+    __slots__ = (
+        "children",
+        "parent",
+        "key",
+        "value",
+        "lock_ref",
+        "swa_lock_ref",
+        "swa_tombstone",
+        "swa_boundary_id",
+        "last_access_time",
+        "hit_count",
+        "hash_values",
+        "id",
+    )
+
+    def __init__(self) -> None:
+        self.children: Dict[Any, TreeNode] = defaultdict(TreeNode)
+        self.parent: Optional[TreeNode] = None
+        self.key: Optional[RadixKey] = None
+        self.value: Optional[torch.Tensor] = None
+
+        self.lock_ref: int = 0
+        self.swa_lock_ref: int = 0
+        self.swa_tombstone: bool = False
+        self.swa_boundary_id: Optional[int] = None
+
+        self.last_access_time: float = time.monotonic()
+        self.hit_count: int = 0
+        self.hash_values: Optional[List[str]] = None
+        self.id: int = _next_node_id()
+
+    @property
+    def evicted(self) -> bool:
+        return self.value is None
+
+    def __lt__(self, other: TreeNode) -> bool:
+        return self.last_access_time < other.last_access_time
+
+
+def _key_match(key0: RadixKey, key1: RadixKey, page_size: int) -> int:
+    """Return the length of the common prefix (page-aligned when *page_size* > 1)."""
+    if key0.extra_key != key1.extra_key:
+        return 0
+    if page_size == 1:
+        i = 0
+        for a, b in zip(key0.token_ids, key1.token_ids):
+            if a != b:
+                break
+            i += 1
+        return i
+    min_len = min(len(key0), len(key1))
+    i = 0
+    while i < min_len:
+        if key0.token_ids[i : i + page_size] != key1.token_ids[i : i + page_size]:
+            break
+        i += page_size
+    return i
+
+
+def _child_key(key: RadixKey, page_size: int) -> Any:
+    """Derive the dict key used in ``node.children``."""
+    plain = key.token_ids[0] if page_size == 1 else tuple(key.token_ids[:page_size])
+    return (key.extra_key, plain) if key.extra_key is not None else plain
+
+
+@dataclass
+class MatchResult:
+    """Returned by :meth:`RadixCache.match_prefix`."""
+
+    indices: torch.Tensor
+    last_node: TreeNode
+    prefix_len: int = 0
+
+
+@dataclass
+class InsertResult:
+    """Returned by :meth:`RadixCache.insert`."""
+
+    prefix_len: int = 0
+
+
+@dataclass
+class EvictResult:
+    """Returned by :meth:`RadixCache.evict`."""
+
+    full_evicted: int = 0
+    swa_evicted: int = 0
+
+
+class RadixCache:
+    """Lightweight radix tree for KV-cache prefix sharing.
+
+    Parameters
+    ----------
+    page_size:
+        Number of tokens per KV-pool page.  Keys and values are aligned to
+        this granularity.
+    sliding_window_size:
+        If set, enables SWA mode.  The cache tracks which nodes have had
+        their SWA KV freed (tombstoned) and constrains prefix matching
+        so that the sliding-window invariant is maintained.
+    disable:
+        When *True* every public method is a no-op (useful for ablation).
+    token_to_kv_pool_allocator:
+        Optional pool allocator with ``free(indices)`` (and ``free_swa`` for
+        SWA mode).  When *None*, index tensors are simply discarded.
+    """
+
+    def __init__(
+        self,
+        page_size: int = 1,
+        sliding_window_size: Optional[int] = None,
+        disable: bool = False,
+        token_to_kv_pool_allocator: Any = None,
+    ):
+        self.page_size = page_size
+        self.sliding_window_size = sliding_window_size
+        self.disable = disable
+        self.pool = token_to_kv_pool_allocator
+
+        if self.pool is not None and hasattr(self.pool, "device"):
+            self.device = self.pool.device
+        else:
+            self.device = torch.device("cpu")
+
+        self._swa_boundary_counter: int = 0
+        self.reset()
+
+    @property
+    def supports_swa(self) -> bool:
+        return self.sliding_window_size is not None
+
+    def evictable_size(self) -> int:
+        return self._evictable_size
+
+    def swa_evictable_size(self) -> int:
+        return self._swa_evictable_size
+
+    def protected_size(self) -> int:
+        return self._protected_size
+
+    def swa_protected_size(self) -> int:
+        return self._swa_protected_size
+
+    def reset(self) -> None:
+        """Clear all cached state and re-initialise the root node."""
+        self.root_node = TreeNode()
+        self.root_node.key = RadixKey([])
+        self.root_node.value = torch.tensor([], dtype=torch.int64)
+        self.root_node.lock_ref = 1
+        self.root_node.swa_lock_ref = 1
+        self._evictable_size: int = 0
+        self._swa_evictable_size: int = 0
+        self._protected_size: int = 0
+        self._swa_protected_size: int = 0
+
+    def match_prefix(self, key: RadixKey) -> MatchResult:
+        """Find the longest cached prefix of *key*.
+
+        For SWA mode the match is further constrained: the path from the
+        returned ``last_node`` to root must have at least
+        ``sliding_window_size`` non-tombstone tokens (or be entirely
+        tombstone-free back to root).
+
+        Accessing a prefix refreshes LRU timestamps along the matched path.
+        """
+        empty = MatchResult(
+            indices=torch.empty(0, dtype=torch.int64, device=self.device),
+            last_node=self.root_node,
+        )
+        if self.disable or len(key) == 0:
+            return empty
+
+        key = self._page_align_key(key)
+        if len(key) == 0:
+            return empty
+
+        if self.supports_swa:
+            values, last_node, best_count = self._match_swa(key)
+            values = values[:best_count]
+        else:
+            values, last_node = self._match_normal(key)
+
+        cat = (
+            torch.cat(values)
+            if values
+            else torch.empty(0, dtype=torch.int64, device=self.device)
+        )
+        return MatchResult(indices=cat, last_node=last_node, prefix_len=len(cat))
+
+    def insert(
+        self,
+        key: RadixKey,
+        value: Optional[torch.Tensor] = None,
+        *,
+        prev_prefix_len: int = 0,
+        swa_evicted_seqlen: int = 0,
+    ) -> InsertResult:
+        """Insert *key*/*value* into the tree.
+
+        Returns how many leading tokens were already present (the prefix
+        length).  The caller is responsible for freeing duplicate KV indices
+        in the range ``[cache_protected_len, prefix_len)``.
+
+        Parameters
+        ----------
+        prev_prefix_len:
+            (SWA mode) tokens before this offset are already protected and
+            should not have their values overwritten.
+        swa_evicted_seqlen:
+            (SWA mode) the sequence length up to which SWA KV has been
+            previously evicted.  Used to decide whether a tombstoned node can
+            be un-tombstoned with the incoming value.
+        """
+        if self.disable:
+            return InsertResult()
+        if value is None:
+            value = torch.tensor(key.token_ids, dtype=torch.int64)
+        if self.supports_swa:
+            plen = self._insert_swa(
+                self.root_node, key, value, prev_prefix_len, swa_evicted_seqlen
+            )
+        else:
+            plen = self._insert_normal(self.root_node, key, value)
+        return InsertResult(prefix_len=plen)
+
+    def evict(self, num_tokens: int, swa_num_tokens: int = 0) -> EvictResult:
+        """Evict up to *num_tokens* (full) and *swa_num_tokens* (SWA) tokens.
+
+        Full eviction removes leaf nodes entirely; SWA eviction tombstones
+        internal nodes (freeing SWA KV but retaining full-attn KV).
+        """
+        if self.disable:
+            return EvictResult()
+
+        full_evicted = 0
+        swa_evicted = 0
+
+        # Phase 1: full leaf eviction
+        if num_tokens > 0:
+            leaves = self._collect_evictable_leaves()
+            heap: List[Tuple[float, TreeNode]] = [
+                (n.last_access_time, n) for n in leaves
+            ]
+            heapq.heapify(heap)
+
+            while full_evicted < num_tokens and heap:
+                _, node = heapq.heappop(heap)
+                if node.evicted or node.lock_ref > 0:
+                    continue
+                n = len(node.value)
+                self._free_indices(node.value)
+                full_evicted += n
+                swa_evicted += n
+                self._delete_leaf(node)
+
+                p = node.parent
+                if (
+                    p is not None
+                    and p != self.root_node
+                    and len(p.children) == 0
+                    and p.lock_ref == 0
+                ):
+                    if self.supports_swa and p.swa_tombstone:
+                        self._free_indices(p.value)
+                        full_evicted += len(p.value)
+                        self._delete_leaf(p)
+                    else:
+                        heapq.heappush(heap, (p.last_access_time, p))
+
+        # Phase 2: SWA tombstone eviction (internal nodes)
+        if self.supports_swa and swa_evicted < swa_num_tokens:
+            candidates = self._collect_swa_evictable()
+            heap2: List[Tuple[float, TreeNode]] = [
+                (n.last_access_time, n) for n in candidates
+            ]
+            heapq.heapify(heap2)
+
+            while swa_evicted < swa_num_tokens and heap2:
+                _, node = heapq.heappop(heap2)
+                if node.swa_tombstone or node.swa_lock_ref > 0 or node.evicted:
+                    continue
+                n = len(node.value)
+                if len(node.children) == 0 and node.lock_ref == 0:
+                    self._free_indices(node.value)
+                    full_evicted += n
+                    swa_evicted += n
+                    self._delete_leaf(node)
+                elif len(node.children) > 0:
+                    self._free_swa_indices(node.value)
+                    swa_evicted += n
+                    self._tombstone_node(node)
+
+        return EvictResult(full_evicted=full_evicted, swa_evicted=swa_evicted)
+
+    def inc_lock_ref(self, node: TreeNode) -> Optional[int]:
+        """Lock nodes from *node* up to root (prevents eviction).
+
+        Returns ``swa_boundary_id`` that must be passed back to
+        :meth:`dec_lock_ref`.  In non-SWA mode, returns ``None``.
+        """
+        if self.disable or node is None:
+            return None
+
+        swa_locked = 0
+        swa_boundary_id: Optional[int] = None
+        cur = node
+        while cur != self.root_node:
+            if cur.lock_ref == 0:
+                self._evictable_size -= len(cur.key)
+                self._protected_size += len(cur.key)
+            cur.lock_ref += 1
+
+            if (
+                self.supports_swa
+                and swa_locked < self.sliding_window_size
+                and not cur.swa_tombstone
+            ):
+                if cur.swa_lock_ref == 0:
+                    self._swa_evictable_size -= len(cur.key)
+                    self._swa_protected_size += len(cur.key)
+                cur.swa_lock_ref += 1
+                swa_locked += len(cur.key)
+                if swa_locked >= self.sliding_window_size:
+                    if cur.swa_boundary_id is None:
+                        self._swa_boundary_counter += 1
+                        cur.swa_boundary_id = self._swa_boundary_counter
+                    swa_boundary_id = cur.swa_boundary_id
+
+            cur = cur.parent
+        return swa_boundary_id
+
+    def dec_lock_ref(
+        self, node: TreeNode, swa_boundary_id: Optional[int] = None
+    ) -> None:
+        """Unlock nodes from *node* up to root."""
+        if self.disable or node is None:
+            return
+
+        dec_swa = True
+        cur = node
+        while cur != self.root_node:
+            if cur.lock_ref == 1:
+                self._evictable_size += len(cur.key)
+                self._protected_size -= len(cur.key)
+            cur.lock_ref -= 1
+
+            if self.supports_swa and dec_swa and not cur.swa_tombstone:
+                if cur.swa_lock_ref == 1:
+                    self._swa_evictable_size += len(cur.key)
+                    self._swa_protected_size -= len(cur.key)
+                cur.swa_lock_ref -= 1
+                if swa_boundary_id and cur.swa_boundary_id == swa_boundary_id:
+                    dec_swa = False
+
+            cur = cur.parent
+
+    def total_size(self) -> int:
+        """Total number of cached tokens (including tombstoned)."""
+        total = 0
+        stack: List[TreeNode] = [self.root_node]
+        while stack:
+            n = stack.pop()
+            if n.value is not None:
+                total += len(n.value)
+            stack.extend(c for c in n.children.values() if not c.evicted)
+        return total
+
+    def compute_node_hash(self, node: TreeNode) -> List[str]:
+        """Compute position-aware SHA-256 hashes for *node* (one per page).
+
+        Lazily computed and cached on ``node.hash_values``.
+        """
+        if node.hash_values is not None:
+            return node.hash_values
+
+        parent_hash: Optional[str] = None
+        if (
+            node.parent is not None
+            and node.parent.hash_values is not None
+            and len(node.parent.key) > 0
+            and len(node.parent.hash_values) > 0
+        ):
+            parent_hash = node.parent.hash_values[-1]
+
+        hashes: List[str] = []
+        for start in range(0, len(node.key), self.page_size):
+            page = node.key.token_ids[start : start + self.page_size]
+            if not page:
+                continue
+            h = hash_token_ids(page, prior_hash=parent_hash)
+            hashes.append(h)
+            parent_hash = h
+
+        node.hash_values = hashes
+        return hashes
+
+    def pretty_print(self) -> None:
+        """Print the tree structure to stdout."""
+        self._print_helper(self.root_node, 0)
+        print(
+            f"total={self.total_size()}  evictable={self._evictable_size}"
+            + (
+                f"  swa_evictable={self._swa_evictable_size}"
+                if self.supports_swa
+                else ""
+            )
+        )
+
+    def _match_normal(self, key: RadixKey) -> Tuple[List[torch.Tensor], TreeNode]:
+        node = self.root_node
+        now = time.monotonic()
+        node.last_access_time = now
+        values: List[torch.Tensor] = []
+
+        while len(key) > 0:
+            ck = _child_key(key, self.page_size)
+            if ck not in node.children:
+                break
+            child = node.children[ck]
+            child.last_access_time = now
+            child.hit_count += 1
+            plen = _key_match(child.key, key, self.page_size)
+            if plen < len(child.key):
+                new_node = self._split_node(child.key, child, plen)
+                values.append(new_node.value)
+                node = new_node
+                break
+            values.append(child.value)
+            node = child
+            key = key[plen:]
+
+        return values, node
+
+    def _match_swa(self, key: RadixKey) -> Tuple[List[torch.Tensor], TreeNode, int]:
+        """SWA-aware match.  Returns *(values, last_node, best_value_count)*.
+
+        ``best_value_count`` is the number of value tensors from *values*
+        that form a valid SWA-safe prefix (enough non-tombstone tokens within
+        the sliding window, or a tombstone-free path to root).
+        """
+        node = self.root_node
+        values: List[torch.Tensor] = []
+        non_tomb_len: float = float("inf")
+        best_count = 0
+        best_node = node
+
+        while len(key) > 0:
+            ck = _child_key(key, self.page_size)
+            if ck not in node.children:
+                break
+            child = node.children[ck]
+
+            if child.swa_tombstone:
+                if non_tomb_len >= self.sliding_window_size:
+                    best_count = len(values)
+                    best_node = node
+                non_tomb_len = 0
+
+            plen = _key_match(child.key, key, self.page_size)
+            if plen < len(child.key):
+                new_node = self._split_node(child.key, child, plen)
+                values.append(new_node.value)
+                if not new_node.swa_tombstone:
+                    non_tomb_len += len(new_node.value)
+                node = new_node
+                break
+            values.append(child.value)
+            if not child.swa_tombstone:
+                non_tomb_len += len(child.value)
+            node = child
+            key = key[plen:]
+
+        if non_tomb_len >= self.sliding_window_size:
+            best_count = len(values)
+            best_node = node
+
+        return values, best_node, best_count
+
+    def _insert_normal(self, node: TreeNode, key: RadixKey, value: torch.Tensor) -> int:
+        now = time.monotonic()
+        node.last_access_time = now
+        if len(key) == 0:
+            return 0
+
+        total_prefix = 0
+        while len(key) > 0:
+            ck = _child_key(key, self.page_size)
+            if ck not in node.children:
+                break
+            node = node.children[ck]
+            node.last_access_time = now
+            plen = _key_match(node.key, key, self.page_size)
+            if plen < len(node.key):
+                self._split_node(node.key, node, plen)
+            total_prefix += plen
+            key = key[plen:]
+            value = value[plen:]
+
+        if len(key) > 0:
+            self._add_leaf(node, key, value)
+
+        return total_prefix
+
+    def _insert_swa(
+        self,
+        node: TreeNode,
+        key: RadixKey,
+        value: torch.Tensor,
+        prev_prefix_len: int,
+        swa_evicted_seqlen: int,
+    ) -> int:
+        """Insert with SWA tombstone awareness.
+
+        When an existing node is tombstoned and the incoming *value* carries
+        fresh SWA KV (i.e. beyond *swa_evicted_seqlen*), the node is
+        un-tombstoned and its value is replaced.
+        """
+        now = time.monotonic()
+        node.last_access_time = now
+        if len(key) == 0:
+            return 0
+
+        total_prefix = 0
+        while len(key) > 0:
+            ck = _child_key(key, self.page_size)
+            if ck not in node.children:
+                break
+            node = node.children[ck]
+            node.last_access_time = now
+            plen = _key_match(node.key, key, self.page_size)
+
+            if plen < len(node.key):
+                self._split_node(node.key, node, plen)
+
+            beyond_protected = prev_prefix_len < total_prefix + plen
+            if beyond_protected and node.swa_tombstone:
+                if swa_evicted_seqlen <= total_prefix:
+                    self._free_indices(node.value[:plen])
+                    node.value = value[:plen].clone()
+                    node.swa_tombstone = False
+                    self._swa_evictable_size += len(node.value)
+                else:
+                    self._free_indices(value[:plen])
+            elif beyond_protected:
+                self._free_indices(value[:plen])
+
+            total_prefix += plen
+            key = key[plen:]
+            value = value[plen:]
+
+        if len(key) > 0:
+            if (
+                swa_evicted_seqlen > total_prefix
+                and swa_evicted_seqlen < total_prefix + len(key)
+            ):
+                tomb_len = swa_evicted_seqlen - total_prefix
+                self._add_leaf(
+                    node, key[:tomb_len], value[:tomb_len], swa_tombstone=True
+                )
+                node = node.children[_child_key(key, self.page_size)]
+                key = key[tomb_len:]
+                value = value[tomb_len:]
+
+            if len(key) > 0:
+                self._add_leaf(node, key, value, swa_tombstone=False)
+
+        return total_prefix
+
+    def _add_leaf(
+        self,
+        parent: TreeNode,
+        key: RadixKey,
+        value: torch.Tensor,
+        swa_tombstone: bool = False,
+    ) -> TreeNode:
+        new_node = TreeNode()
+        new_node.parent = parent
+        new_node.key = key
+        new_node.value = value.clone()
+        new_node.swa_tombstone = swa_tombstone
+        parent.children[_child_key(key, self.page_size)] = new_node
+        self._evictable_size += len(key)
+        if self.supports_swa and not swa_tombstone:
+            self._swa_evictable_size += len(key)
+        return new_node
+
+    def _split_node(self, key: RadixKey, child: TreeNode, split_len: int) -> TreeNode:
+        """Split *child* at *split_len*, returning the new parent node."""
+        new_node = TreeNode()
+        new_node.children[_child_key(key[split_len:], self.page_size)] = child
+        new_node.parent = child.parent
+        new_node.lock_ref = child.lock_ref
+        new_node.swa_lock_ref = child.swa_lock_ref
+        new_node.swa_tombstone = child.swa_tombstone
+        new_node.swa_boundary_id = child.swa_boundary_id
+        child.swa_boundary_id = None
+        new_node.key = child.key[:split_len]
+        new_node.value = child.value[:split_len].clone()
+
+        # Split hash values if they exist
+        if child.hash_values is not None:
+            pages = split_len // self.page_size if self.page_size > 1 else split_len
+            new_node.hash_values = child.hash_values[:pages]
+            child.hash_values = child.hash_values[pages:]
+        else:
+            new_node.hash_values = None
+
+        child.parent = new_node
+        child.key = child.key[split_len:]
+        child.value = child.value[split_len:].clone()
+        new_node.parent.children[_child_key(key, self.page_size)] = new_node
+        return new_node
+
+    def _delete_leaf(self, node: TreeNode) -> None:
+        ck = _child_key(node.key, self.page_size)
+        node.parent.children.pop(ck, None)
+        self._evictable_size -= len(node.key)
+        if self.supports_swa and not node.swa_tombstone:
+            self._swa_evictable_size -= len(node.key)
+
+    def _tombstone_node(self, node: TreeNode) -> None:
+        node.swa_tombstone = True
+        self._swa_evictable_size -= len(node.key)
+
+    def _collect_evictable_leaves(self) -> List[TreeNode]:
+        leaves: List[TreeNode] = []
+        stack: List[TreeNode] = [self.root_node]
+        while stack:
+            n = stack.pop()
+            if n.evicted:
+                continue
+            has_live_child = False
+            for c in n.children.values():
+                if not c.evicted:
+                    has_live_child = True
+                    stack.append(c)
+            if not has_live_child and n.lock_ref == 0 and n != self.root_node:
+                leaves.append(n)
+        return leaves
+
+    def _collect_swa_evictable(self) -> List[TreeNode]:
+        nodes: List[TreeNode] = []
+        stack: List[TreeNode] = [self.root_node]
+        while stack:
+            n = stack.pop()
+            if n.evicted:
+                continue
+            if n != self.root_node and not n.swa_tombstone and n.swa_lock_ref == 0:
+                nodes.append(n)
+            stack.extend(c for c in n.children.values() if not c.evicted)
+        return nodes
+
+    def _page_align_key(self, key: RadixKey) -> RadixKey:
+        if self.page_size == 1:
+            return key
+        aligned = len(key) // self.page_size * self.page_size
+        return key[:aligned]
+
+    def _free_indices(self, indices: torch.Tensor) -> None:
+        if self.pool is not None and len(indices) > 0:
+            self.pool.free(indices)
+
+    def _free_swa_indices(self, indices: torch.Tensor) -> None:
+        if self.pool is not None and len(indices) > 0:
+            if hasattr(self.pool, "free_swa"):
+                self.pool.free_swa(indices)
+            else:
+                self.pool.free(indices)
+
+    def _print_helper(self, node: TreeNode, indent: int) -> None:
+        stack = [(node, indent)]
+        while stack:
+            n, ind = stack.pop()
+            toks = n.key.token_ids[:10] if n.key else []
+            klen = len(n.key) if n.key else 0
+            flags = f"lock={n.lock_ref}"
+            if self.supports_swa:
+                flags += f" swa={n.swa_lock_ref} tomb={n.swa_tombstone}"
+            print(f"{'  ' * ind}[{klen}] {toks} {flags}")
+            for c in n.children.values():
+                stack.append((c, ind + 1))
diff --git a/pymllm/orchestrator/async_disk_io_process.py b/pymllm/orchestrator/async_disk_io_process.py
index 598d93eb..ef3fd5f0 100644
--- a/pymllm/orchestrator/async_disk_io_process.py
+++ b/pymllm/orchestrator/async_disk_io_process.py
@@ -1,3 +1,84 @@
+"""
+AsyncDiskIoProcess -- optional subprocess for asynchronous disk I/O.
+
+Handles weight loading, checkpoint saving, or other heavy disk operations
+without blocking the scheduler or model runner.
+"""
+
+import logging
+from multiprocessing.connection import Connection
+from typing import Any, Dict, Optional
+
+import zmq
+
+from pymllm.orchestrator.ipc_utils import create_zmq_socket
+
+logger = logging.getLogger(__name__)
+
+
 class AsyncDiskIoProcess:
-    def __init__(self):
+    """Runs inside a subprocess.  Performs disk I/O on behalf of the scheduler."""
+
+    def __init__(self, recv_addr: str):
+        self._recv_addr = recv_addr
+
+        self._zmq_ctx: Optional[zmq.Context] = None
+        self._recv_sock: Optional[zmq.Socket] = None
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def init_sockets(self) -> None:
+        self._zmq_ctx = zmq.Context()
+        self._recv_sock = create_zmq_socket(
+            self._zmq_ctx, zmq.PULL, self._recv_addr, bind=True,
+        )
+
+    def event_loop(self) -> None:
+        """Infinite loop: recv I/O request -> execute -> (optionally reply)."""
+        logger.info("AsyncDiskIoProcess event loop started")
+        while True:
+            io_request: Dict[str, Any] = self._recv_sock.recv_pyobj()
+            self._handle(io_request)
+
+    # ------------------------------------------------------------------
+    # I/O handling (placeholder)
+    # ------------------------------------------------------------------
+
+    def _handle(self, io_request: Dict[str, Any]) -> None:
+        """Dispatch an I/O request.
+
+        TODO: implement weight loading, checkpoint save, etc.
+        """
+        kind = io_request.get("kind")
+        logger.debug("AsyncDiskIoProcess received request kind=%s", kind)
+
+    # ------------------------------------------------------------------
+    # Cleanup
+    # ------------------------------------------------------------------
+
+    def shutdown(self) -> None:
+        if self._recv_sock is not None:
+            self._recv_sock.close()
+        if self._zmq_ctx is not None:
+            self._zmq_ctx.term()
+
+
+def run_async_disk_io_process(
+    recv_addr: str,
+    pipe_writer: Connection,
+) -> None:
+    """Entry point for ``torch.multiprocessing.Process(target=...)``."""
+    proc = AsyncDiskIoProcess(recv_addr)
+    proc.init_sockets()
+
+    pipe_writer.send({"status": "ready", "process": "async_disk_io"})
+    pipe_writer.close()
+
+    try:
+        proc.event_loop()
+    except KeyboardInterrupt:
         pass
+    finally:
+        proc.shutdown()
diff --git a/pymllm/orchestrator/detokenizer_process.py b/pymllm/orchestrator/detokenizer_process.py
index 47c1c595..e9d5184b 100644
--- a/pymllm/orchestrator/detokenizer_process.py
+++ b/pymllm/orchestrator/detokenizer_process.py
@@ -1,3 +1,114 @@
+"""
+DetokenizerProcess -- subprocess that converts token IDs back to text.
+
+Receives ``BatchTokenIDOut``-style dicts from the SchedulerProcess,
+detokenizes them, and forwards the decoded strings to the
+RequestResponseProcess.
+"""
+
+import logging
+from multiprocessing.connection import Connection
+from typing import Any, Dict, List, Optional
+
+import zmq
+
+from pymllm.orchestrator.ipc_utils import create_zmq_socket
+
+logger = logging.getLogger(__name__)
+
+
 class DetokenizerProcess:
-    def __init__(self):
+    """Runs inside a subprocess.  Detokenizes finished outputs."""
+
+    def __init__(
+        self,
+        recv_from_scheduler_addr: str,
+        send_to_rr_addr: str,
+    ):
+        self._recv_from_scheduler_addr = recv_from_scheduler_addr
+        self._send_to_rr_addr = send_to_rr_addr
+
+        self._zmq_ctx: Optional[zmq.Context] = None
+        self._recv_from_scheduler: Optional[zmq.Socket] = None
+        self._send_to_rr: Optional[zmq.Socket] = None
+
+        # TODO: initialise the tokenizer (needed for decode)
+        self._tokenizer = None
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def init_sockets(self) -> None:
+        self._zmq_ctx = zmq.Context()
+        self._recv_from_scheduler = create_zmq_socket(
+            self._zmq_ctx,
+            zmq.PULL,
+            self._recv_from_scheduler_addr,
+            bind=False,
+        )
+        self._send_to_rr = create_zmq_socket(
+            self._zmq_ctx,
+            zmq.PUSH,
+            self._send_to_rr_addr,
+            bind=False,
+        )
+
+    def event_loop(self) -> None:
+        """Infinite loop: recv token IDs -> detokenize -> send text to RR."""
+        logger.info("DetokenizerProcess event loop started")
+        while True:
+            token_id_out = self._recv_from_scheduler.recv_pyobj()
+            str_out = self._detokenize(token_id_out)
+            self._send_to_rr.send_pyobj(str_out)
+
+    # ------------------------------------------------------------------
+    # Detokenization (placeholder)
+    # ------------------------------------------------------------------
+
+    def _detokenize(self, token_id_out: Dict[str, Any]) -> Dict[str, Any]:
+        """Convert token IDs to text.
+
+        TODO: replace with real tokenizer.decode() call and incremental
+        detokenization logic.
+        """
+        output_ids: List[int] = token_id_out.get("output_token_ids", [])
+        # placeholder: join ids as string
+        text = ""  # TODO: self._tokenizer.decode(output_ids)
+        return {
+            "rid": token_id_out.get("rid"),
+            "text": text,
+            "output_token_ids": output_ids,
+        }
+
+    # ------------------------------------------------------------------
+    # Cleanup
+    # ------------------------------------------------------------------
+
+    def shutdown(self) -> None:
+        if self._recv_from_scheduler is not None:
+            self._recv_from_scheduler.close()
+        if self._send_to_rr is not None:
+            self._send_to_rr.close()
+        if self._zmq_ctx is not None:
+            self._zmq_ctx.term()
+
+
+def run_detokenizer_process(
+    recv_from_scheduler_addr: str,
+    send_to_rr_addr: str,
+    pipe_writer: Connection,
+) -> None:
+    """Entry point for ``torch.multiprocessing.Process(target=...)``."""
+    proc = DetokenizerProcess(recv_from_scheduler_addr, send_to_rr_addr)
+    proc.init_sockets()
+
+    pipe_writer.send({"status": "ready", "process": "detokenizer"})
+    pipe_writer.close()
+
+    try:
+        proc.event_loop()
+    except KeyboardInterrupt:
         pass
+    finally:
+        proc.shutdown()
diff --git a/pymllm/orchestrator/ipc_utils.py b/pymllm/orchestrator/ipc_utils.py
new file mode 100644
index 00000000..faaf7a6d
--- /dev/null
+++ b/pymllm/orchestrator/ipc_utils.py
@@ -0,0 +1,70 @@
+"""ZMQ IPC utilities for inter-process communication.
+
+Provides helpers to generate unique IPC addresses and create pre-configured
+ZMQ sockets so that every process uses the same conventions.
+"""
+
+import os
+import tempfile
+from typing import Optional
+
+import zmq
+
+
+_IPC_DIR = os.path.join(tempfile.gettempdir(), "pymllm_ipc")
+
+
+def _ensure_ipc_dir() -> None:
+    os.makedirs(_IPC_DIR, exist_ok=True)
+
+
+def make_ipc_address(name: str, unique_id: Optional[str] = None) -> str:
+    """Return an ``ipc://`` address for *name*, optionally scoped by *unique_id*.
+
+    Parameters
+    ----------
+    name
+        Logical channel name, e.g. ``"rr_to_tokenizer"``.
+    unique_id
+        Per-engine identifier (typically ``str(os.getpid())``) to avoid
+        collisions when multiple engines run on the same host.
+    """
+    _ensure_ipc_dir()
+    suffix = f"_{unique_id}" if unique_id else ""
+    return f"ipc://{_IPC_DIR}/pymllm_{name}{suffix}"
+
+
+def create_zmq_socket(
+    ctx: zmq.Context,
+    socket_type: int,
+    address: str,
+    bind: bool,
+) -> zmq.Socket:
+    """Create a ZMQ socket, bind or connect it, and return it.
+
+    Parameters
+    ----------
+    ctx
+        A ``zmq.Context`` shared within the process.
+    socket_type
+        One of ``zmq.PUSH``, ``zmq.PULL``, ``zmq.PAIR``, etc.
+    address
+        The ``ipc://`` address string.
+    bind
+        If ``True`` the socket calls ``bind``; otherwise ``connect``.
+    """
+    sock = ctx.socket(socket_type)
+    sock.setsockopt(zmq.LINGER, 0)
+    if bind:
+        sock.bind(address)
+    else:
+        sock.connect(address)
+    return sock
+
+
+def close_zmq_socket(sock: zmq.Socket) -> None:
+    """Close a ZMQ socket, ignoring errors."""
+    try:
+        sock.close()
+    except zmq.ZMQError:
+        pass
diff --git a/pymllm/orchestrator/model_runner_process.py b/pymllm/orchestrator/model_runner_process.py
index 45091b59..4b28645e 100644
--- a/pymllm/orchestrator/model_runner_process.py
+++ b/pymllm/orchestrator/model_runner_process.py
@@ -1,3 +1,114 @@
+"""
+ModelRunnerProcess -- subprocess that executes model forward passes.
+
+Receives batches from the SchedulerProcess, runs the model forward + sampling,
+and returns the results (logits, next_token_ids) back to the scheduler.
+"""
+
+import logging
+from multiprocessing.connection import Connection
+from typing import Any, Dict, Optional
+
+import zmq
+
+from pymllm.orchestrator.ipc_utils import create_zmq_socket
+
+logger = logging.getLogger(__name__)
+
+
 class ModelRunnerProcess:
-    def __init__(self):
+    """Runs inside a subprocess.  Owns the model and performs forward passes."""
+
+    def __init__(
+        self,
+        recv_from_scheduler_addr: str,
+        send_to_scheduler_addr: str,
+    ):
+        self._recv_from_scheduler_addr = recv_from_scheduler_addr
+        self._send_to_scheduler_addr = send_to_scheduler_addr
+
+        self._zmq_ctx: Optional[zmq.Context] = None
+        self._recv_from_scheduler: Optional[zmq.Socket] = None
+        self._send_to_scheduler: Optional[zmq.Socket] = None
+
+        # TODO: initialise model, attention backend, memory pool, etc.
+        self._model = None
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def init_sockets(self) -> None:
+        self._zmq_ctx = zmq.Context()
+        self._recv_from_scheduler = create_zmq_socket(
+            self._zmq_ctx, zmq.PULL, self._recv_from_scheduler_addr, bind=False,
+        )
+        self._send_to_scheduler = create_zmq_socket(
+            self._zmq_ctx, zmq.PUSH, self._send_to_scheduler_addr, bind=False,
+        )
+
+    def event_loop(self) -> None:
+        """Infinite loop: recv batch -> forward -> sample -> send result."""
+        logger.info("ModelRunnerProcess event loop started")
+        while True:
+            batch = self._recv_from_scheduler.recv_pyobj()
+            result = self._forward_batch(batch)
+            self._send_to_scheduler.send_pyobj(result)
+
+    # ------------------------------------------------------------------
+    # Forward pass (placeholder)
+    # ------------------------------------------------------------------
+
+    def _forward_batch(self, batch: Dict[str, Any]) -> Dict[str, Any]:
+        """Run the model forward pass and sampling for *batch*.
+
+        TODO: implement real forward pass, logits processing, and sampling.
+        """
+        requests = batch.get("requests", [])
+        finished = []
+        unfinished = []
+
+        for req in requests:
+            # TODO: actual model forward, logits -> next_token_ids
+            next_token_ids = []  # placeholder
+            req["output_token_ids"] = req.get("output_token_ids", []) + next_token_ids
+            # TODO: check EOS / max_tokens to decide finished vs. unfinished
+            finished.append(req)
+
+        return {
+            "batch_id": batch.get("batch_id"),
+            "finished": finished,
+            "unfinished": unfinished,
+        }
+
+    # ------------------------------------------------------------------
+    # Cleanup
+    # ------------------------------------------------------------------
+
+    def shutdown(self) -> None:
+        if self._recv_from_scheduler is not None:
+            self._recv_from_scheduler.close()
+        if self._send_to_scheduler is not None:
+            self._send_to_scheduler.close()
+        if self._zmq_ctx is not None:
+            self._zmq_ctx.term()
+
+
+def run_model_runner_process(
+    recv_from_scheduler_addr: str,
+    send_to_scheduler_addr: str,
+    pipe_writer: Connection,
+) -> None:
+    """Entry point for ``torch.multiprocessing.Process(target=...)``."""
+    proc = ModelRunnerProcess(recv_from_scheduler_addr, send_to_scheduler_addr)
+    proc.init_sockets()
+
+    pipe_writer.send({"status": "ready", "process": "model_runner"})
+    pipe_writer.close()
+
+    try:
+        proc.event_loop()
+    except KeyboardInterrupt:
         pass
+    finally:
+        proc.shutdown()
diff --git a/pymllm/orchestrator/request_response_process.py b/pymllm/orchestrator/request_response_process.py
index 998c2655..74335428 100644
--- a/pymllm/orchestrator/request_response_process.py
+++ b/pymllm/orchestrator/request_response_process.py
@@ -1,10 +1,150 @@
 """
-This module contains the request and response threads for the orchestrator.
+RequestResponseProcess -- the main-process entry point for user requests.
 
-NOTE: This RR(request and response) threads can only be used as the main thread of the orchestrator.
+This process is **not** a subprocess; it lives in the engine's main process.
+Incoming requests are placed into an ``asyncio.Queue`` and forwarded to the
+TokenizerProcess via ZMQ.  Decoded results arrive back from the
+DetokenizerProcess and are dispatched to the waiting callers.
+
+The request-tracking model uses ``ReqState`` pattern: each request
+gets an ``asyncio.Event`` + output list so that streaming (multiple incremental
+chunks) and one-shot responses are both supported.
 """
 
+import asyncio
+import dataclasses
+import logging
+from typing import Any, Dict, List, Optional
+
+import zmq
+import zmq.asyncio
+
+from pymllm.engine.io_struct import GenerateReqInput
+from pymllm.orchestrator.ipc_utils import create_zmq_socket, close_zmq_socket
+
+logger = logging.getLogger(__name__)
+
+
+@dataclasses.dataclass
+class ReqState:
+    """Per-request state that supports both streaming and one-shot responses.
+
+    ``ReqState`` (Event + out_list).
+
+    The recv loop appends results to *out_list* and signals *event*;
+    callers ``await event.wait()`` in a loop, consuming results until
+    *finished* is ``True``.
+    """
+
+    out_list: List[Dict[str, Any]] = dataclasses.field(default_factory=list)
+    finished: bool = False
+    event: asyncio.Event = dataclasses.field(default_factory=asyncio.Event)
+
 
 class RequestResponseProcess:
-    def __init__(self):
-        pass
+    """Sits in the main process; bridges user-facing API and subprocess pipeline."""
+
+    def __init__(
+        self,
+        send_to_tokenizer_addr: str,
+        recv_from_detokenizer_addr: str,
+    ):
+        self._send_to_tokenizer_addr: str = send_to_tokenizer_addr
+        self._recv_from_detokenizer_addr: str = recv_from_detokenizer_addr
+
+        # asyncio queue that buffers incoming user requests
+        self._request_queue: asyncio.Queue[Dict[str, Any]] = asyncio.Queue()
+
+        # rid -> ReqState (replaces the old rid -> Future dict)
+        self._rid_to_state: Dict[str, ReqState] = {}
+
+        # ZMQ (async context, sockets created lazily in the event loop)
+        self._zmq_ctx: Optional[zmq.asyncio.Context] = None
+        self._send_to_tokenizer: Optional[zmq.asyncio.Socket] = None
+        self._recv_from_detokenizer: Optional[zmq.asyncio.Socket] = None
+
+        self._loop_task: Optional[asyncio.Task] = None
+
+    def start(self, loop: asyncio.AbstractEventLoop) -> None:
+        """Kick off the background send/recv tasks on *loop*."""
+        self._zmq_ctx = zmq.asyncio.Context()
+        self._send_to_tokenizer = create_zmq_socket(
+            self._zmq_ctx,
+            zmq.PUSH,
+            self._send_to_tokenizer_addr,
+            bind=True,
+        )
+        self._recv_from_detokenizer = create_zmq_socket(
+            self._zmq_ctx,
+            zmq.PULL,
+            self._recv_from_detokenizer_addr,
+            bind=True,
+        )
+        self._loop_task = loop.create_task(self._run())
+
+    async def add_request(self, request: GenerateReqInput) -> ReqState:
+        """Enqueue a request and return its :class:`ReqState`.
+
+        Callers should ``await state.event.wait()`` in a loop, consuming
+        ``state.out_list`` entries until ``state.finished`` is ``True``.
+        """
+        if not isinstance(request.rid, str):
+            raise ValueError("RequestResponseProcess currently accepts single requests only.")
+        rid = request.rid
+        state = ReqState()
+        self._rid_to_state[rid] = state
+        await self._request_queue.put(request.to_request_dict())
+        return state
+
+    def remove_state(self, rid: str) -> None:
+        """Remove the ``ReqState`` for *rid* (called by the caller once done)."""
+        self._rid_to_state.pop(rid, None)
+
+    async def abort_request(self, rid: str) -> None:
+        """Cancel a pending request and notify downstream processes."""
+        state = self._rid_to_state.pop(rid, None)
+        if state is not None and not state.finished:
+            state.finished = True
+            state.out_list.append({"rid": rid, "error": "aborted", "finished": True})
+            state.event.set()
+        await self._send_to_tokenizer.send_pyobj({"rid": rid, "abort": True})
+
+    async def shutdown(self) -> None:
+        if self._loop_task is not None:
+            self._loop_task.cancel()
+        if self._send_to_tokenizer is not None:
+            close_zmq_socket(self._send_to_tokenizer)
+        if self._recv_from_detokenizer is not None:
+            close_zmq_socket(self._recv_from_detokenizer)
+        if self._zmq_ctx is not None:
+            self._zmq_ctx.term()
+
+    # ------------------------------------------------------------------
+    # Internal loops
+    # ------------------------------------------------------------------
+
+    async def _run(self) -> None:
+        """Main loop: forward requests to tokenizer, receive results from detokenizer."""
+        send_task = asyncio.create_task(self._send_loop())
+        recv_task = asyncio.create_task(self._recv_loop())
+        await asyncio.gather(send_task, recv_task)
+
+    async def _send_loop(self) -> None:
+        """Drain the asyncio queue and push requests to the TokenizerProcess."""
+        while True:
+            request = await self._request_queue.get()
+            await self._send_to_tokenizer.send_pyobj(request)
+
+    async def _recv_loop(self) -> None:
+        """Receive decoded results from DetokenizerProcess and dispatch to ReqStates."""
+        while True:
+            result = await self._recv_from_detokenizer.recv_pyobj()
+            rid = result.get("rid")
+            state = self._rid_to_state.get(rid)
+            if state is None:
+                logger.warning("Received result for unknown rid=%s", rid)
+                continue
+            state.out_list.append(result)
+            if result.get("finished", False):
+                state.finished = True
+            state.event.set()
diff --git a/pymllm/orchestrator/scheduler_process.py b/pymllm/orchestrator/scheduler_process.py
index 7a7783d5..e7394dab 100644
--- a/pymllm/orchestrator/scheduler_process.py
+++ b/pymllm/orchestrator/scheduler_process.py
@@ -1,3 +1,248 @@
+"""
+SchedulerProcess -- the central scheduling hub.
+
+Receives tokenized requests from the TokenizerProcess, organises them into
+batches, dispatches batches to the ModelRunnerProcess for forward passes,
+collects results, and streams finished token IDs to the DetokenizerProcess.
+
+The main ``event_loop`` scheduler flow::
+
+    while True:
+        recv_requests()
+        process_input_requests()
+        batch = get_next_batch_to_run()
+        if batch:
+            run_batch(batch)
+            process_batch_result(batch)
+        stream_output()
+"""
+
+import logging
+import time
+from collections import deque
+from multiprocessing.connection import Connection
+from typing import Any, Deque, Dict, List, Optional
+
+import zmq
+
+from pymllm.orchestrator.ipc_utils import create_zmq_socket
+
+logger = logging.getLogger(__name__)
+
+
 class SchedulerProcess:
-    def __init__(self):
+    """Runs inside a subprocess.  Central hub that drives the inference loop."""
+
+    def __init__(
+        self,
+        recv_from_tokenizer_addr: str,
+        send_to_model_runner_addr: str,
+        recv_from_model_runner_addr: str,
+        send_to_detokenizer_addr: str,
+    ):
+        # ZMQ addresses
+        self._recv_from_tokenizer_addr = recv_from_tokenizer_addr
+        self._send_to_model_runner_addr = send_to_model_runner_addr
+        self._recv_from_model_runner_addr = recv_from_model_runner_addr
+        self._send_to_detokenizer_addr = send_to_detokenizer_addr
+
+        # ZMQ runtime objects (initialised in init_sockets)
+        self._zmq_ctx: Optional[zmq.Context] = None
+        self._recv_from_tokenizer: Optional[zmq.Socket] = None
+        self._send_to_model_runner: Optional[zmq.Socket] = None
+        self._recv_from_model_runner: Optional[zmq.Socket] = None
+        self._send_to_detokenizer: Optional[zmq.Socket] = None
+        self._poller: Optional[zmq.Poller] = None
+
+        # Request management
+        self._waiting_queue: Deque[Dict[str, Any]] = deque()
+        self._running_batch: Optional[Dict[str, Any]] = None
+        self._finished: List[Dict[str, Any]] = []
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def init_sockets(self) -> None:
+        self._zmq_ctx = zmq.Context()
+
+        self._recv_from_tokenizer = create_zmq_socket(
+            self._zmq_ctx,
+            zmq.PULL,
+            self._recv_from_tokenizer_addr,
+            bind=False,
+        )
+        self._send_to_model_runner = create_zmq_socket(
+            self._zmq_ctx,
+            zmq.PUSH,
+            self._send_to_model_runner_addr,
+            bind=True,
+        )
+        self._recv_from_model_runner = create_zmq_socket(
+            self._zmq_ctx,
+            zmq.PULL,
+            self._recv_from_model_runner_addr,
+            bind=True,
+        )
+        self._send_to_detokenizer = create_zmq_socket(
+            self._zmq_ctx,
+            zmq.PUSH,
+            self._send_to_detokenizer_addr,
+            bind=True,
+        )
+
+        # Poller for non-blocking recv from tokenizer
+        self._poller = zmq.Poller()
+        self._poller.register(self._recv_from_tokenizer, zmq.POLLIN)
+
+    def event_loop(self) -> None:
+        """Infinite scheduling loop."""
+        logger.info("SchedulerProcess event loop started")
+        while True:
+            self.recv_requests()
+            self.process_input_requests()
+            batch = self.get_next_batch_to_run()
+            if batch is not None:
+                result = self.run_batch(batch)
+                self.process_batch_result(batch, result)
+            self.stream_output()
+
+    # ------------------------------------------------------------------
+    # Step 1: receive tokenized requests (non-blocking)
+    # ------------------------------------------------------------------
+
+    def recv_requests(self) -> None:
+        """Non-blocking receive of tokenized requests from TokenizerProcess.
+
+        Uses ``zmq.Poller`` with a short timeout so the scheduler is never
+        stuck waiting when there are batches to run.
+        """
+        while True:
+            events = dict(self._poller.poll(timeout=0))  # non-blocking
+            if self._recv_from_tokenizer not in events:
+                break
+            req = self._recv_from_tokenizer.recv_pyobj(zmq.NOBLOCK)
+            self._waiting_queue.append(req)
+
+    # ------------------------------------------------------------------
+    # Step 2: process input requests
+    # ------------------------------------------------------------------
+
+    def process_input_requests(self) -> None:
+        """Pre-process and validate requests sitting in ``_waiting_queue``.
+
+        TODO: attach sampling params, allocate KV-cache slots, etc.
+        """
         pass
+
+    # ------------------------------------------------------------------
+    # Step 3: build the next batch
+    # ------------------------------------------------------------------
+
+    def get_next_batch_to_run(self) -> Optional[Dict[str, Any]]:
+        """Select requests from ``_waiting_queue`` and form a batch.
+
+        TODO: implement real batching / scheduling policy.
+        """
+        if not self._waiting_queue:
+            return None
+
+        batch_requests: List[Dict[str, Any]] = []
+        # TODO: respect max_running_requests, memory budget, etc.
+        while self._waiting_queue:
+            batch_requests.append(self._waiting_queue.popleft())
+
+        batch = {
+            "requests": batch_requests,
+            "batch_id": id(batch_requests),
+            "created_at": time.time(),
+        }
+        return batch
+
+    # ------------------------------------------------------------------
+    # Step 4: run the batch via ModelRunnerProcess
+    # ------------------------------------------------------------------
+
+    def run_batch(self, batch: Dict[str, Any]) -> Dict[str, Any]:
+        """Send *batch* to ModelRunnerProcess and wait for the result.
+
+        This is a **blocking** call: the scheduler is synchronous with the
+        model runner for simplicity.  Overlap scheduling can be added later.
+        """
+        self._send_to_model_runner.send_pyobj(batch)
+        result = self._recv_from_model_runner.recv_pyobj()
+        return result
+
+    # ------------------------------------------------------------------
+    # Step 5: process batch result
+    # ------------------------------------------------------------------
+
+    def process_batch_result(
+        self, batch: Dict[str, Any], result: Dict[str, Any]
+    ) -> None:
+        """Handle the result returned by the ModelRunnerProcess.
+
+        TODO: check completion status (EOS, max_tokens), manage KV-cache,
+        split finished vs. unfinished requests.
+        """
+        finished_requests = result.get("finished", [])
+        unfinished_requests = result.get("unfinished", [])
+
+        self._finished.extend(finished_requests)
+
+        # Put unfinished requests back for the next iteration
+        for req in unfinished_requests:
+            self._waiting_queue.appendleft(req)
+
+    # ------------------------------------------------------------------
+    # Step 6: stream output to DetokenizerProcess
+    # ------------------------------------------------------------------
+
+    def stream_output(self) -> None:
+        """Send finished token-ID outputs to the DetokenizerProcess."""
+        while self._finished:
+            item = self._finished.pop(0)
+            self._send_to_detokenizer.send_pyobj(item)
+
+    # ------------------------------------------------------------------
+    # Cleanup
+    # ------------------------------------------------------------------
+
+    def shutdown(self) -> None:
+        for sock in (
+            self._recv_from_tokenizer,
+            self._send_to_model_runner,
+            self._recv_from_model_runner,
+            self._send_to_detokenizer,
+        ):
+            if sock is not None:
+                sock.close()
+        if self._zmq_ctx is not None:
+            self._zmq_ctx.term()
+
+
+def run_scheduler_process(
+    recv_from_tokenizer_addr: str,
+    send_to_model_runner_addr: str,
+    recv_from_model_runner_addr: str,
+    send_to_detokenizer_addr: str,
+    pipe_writer: Connection,
+) -> None:
+    """Entry point for ``torch.multiprocessing.Process(target=...)``."""
+    proc = SchedulerProcess(
+        recv_from_tokenizer_addr,
+        send_to_model_runner_addr,
+        recv_from_model_runner_addr,
+        send_to_detokenizer_addr,
+    )
+    proc.init_sockets()
+
+    pipe_writer.send({"status": "ready", "process": "scheduler"})
+    pipe_writer.close()
+
+    try:
+        proc.event_loop()
+    except KeyboardInterrupt:
+        pass
+    finally:
+        proc.shutdown()
diff --git a/pymllm/orchestrator/tokenizer_process.py b/pymllm/orchestrator/tokenizer_process.py
index 0dca2155..852fac11 100644
--- a/pymllm/orchestrator/tokenizer_process.py
+++ b/pymllm/orchestrator/tokenizer_process.py
@@ -1,3 +1,102 @@
+"""
+TokenizerProcess -- subprocess that tokenizes incoming raw requests.
+
+Receives raw requests from RequestResponseProcess via ZMQ, tokenizes them,
+and forwards the tokenized payloads to the SchedulerProcess.
+"""
+
+import logging
+from multiprocessing.connection import Connection
+from typing import Any, Dict, List
+
+import zmq
+
+from pymllm.orchestrator.ipc_utils import create_zmq_socket
+
+logger = logging.getLogger(__name__)
+
+
 class TokenizerProcess:
-    def __init__(self):
+    """Runs inside a subprocess spawned by ``torch.multiprocessing``."""
+
+    def __init__(
+        self,
+        recv_from_rr_addr: str,
+        send_to_scheduler_addr: str,
+    ):
+        self._recv_from_rr_addr = recv_from_rr_addr
+        self._send_to_scheduler_addr = send_to_scheduler_addr
+
+        self._zmq_ctx: zmq.Context = None
+        self._recv_from_rr: zmq.Socket = None
+        self._send_to_scheduler: zmq.Socket = None
+
+        # TODO: initialise the actual tokenizer (HuggingFace / custom)
+        self._tokenizer = None
+
+    # ------------------------------------------------------------------
+    # Lifecycle
+    # ------------------------------------------------------------------
+
+    def init_sockets(self) -> None:
+        self._zmq_ctx = zmq.Context()
+        self._recv_from_rr = create_zmq_socket(
+            self._zmq_ctx, zmq.PULL, self._recv_from_rr_addr, bind=False,
+        )
+        self._send_to_scheduler = create_zmq_socket(
+            self._zmq_ctx, zmq.PUSH, self._send_to_scheduler_addr, bind=True,
+        )
+
+    def event_loop(self) -> None:
+        """Infinite loop: recv raw request -> tokenize -> send to scheduler."""
+        logger.info("TokenizerProcess event loop started")
+        while True:
+            raw_request: Dict[str, Any] = self._recv_from_rr.recv_pyobj()
+            tokenized = self._tokenize(raw_request)
+            self._send_to_scheduler.send_pyobj(tokenized)
+
+    # ------------------------------------------------------------------
+    # Tokenization (placeholder)
+    # ------------------------------------------------------------------
+
+    def _tokenize(self, raw_request: Dict[str, Any]) -> Dict[str, Any]:
+        """Tokenize a single raw request and return the tokenized payload.
+
+        TODO: replace with real tokenizer call.
+        """
+        text = raw_request.get("text", "")
+        # placeholder: produce fake token ids
+        input_ids: List[int] = []  # TODO: self._tokenizer.encode(text)
+        return {
+            **raw_request,
+            "input_ids": input_ids,
+        }
+
+    def shutdown(self) -> None:
+        if self._recv_from_rr is not None:
+            self._recv_from_rr.close()
+        if self._send_to_scheduler is not None:
+            self._send_to_scheduler.close()
+        if self._zmq_ctx is not None:
+            self._zmq_ctx.term()
+
+
+def run_tokenizer_process(
+    recv_from_rr_addr: str,
+    send_to_scheduler_addr: str,
+    pipe_writer: Connection,
+) -> None:
+    """Entry point for ``torch.multiprocessing.Process(target=...)``."""
+    proc = TokenizerProcess(recv_from_rr_addr, send_to_scheduler_addr)
+    proc.init_sockets()
+
+    # Signal readiness to the parent process
+    pipe_writer.send({"status": "ready", "process": "tokenizer"})
+    pipe_writer.close()
+
+    try:
+        proc.event_loop()
+    except KeyboardInterrupt:
         pass
+    finally:
+        proc.shutdown()

From e5e1b789fe249c229e51a90cb5e1ea888bbbdd32 Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Sat, 21 Feb 2026 15:05:58 +0000
Subject: [PATCH 09/13] refactor: improve socket initialization in
 TokenizerProcess

- Enhanced readability by formatting socket creation parameters across multiple lines in the `init_sockets` method of `TokenizerProcess`.
- Maintained functionality while improving code clarity for future maintenance.
---
 pymllm/orchestrator/tokenizer_process.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pymllm/orchestrator/tokenizer_process.py b/pymllm/orchestrator/tokenizer_process.py
index 852fac11..53714bb6 100644
--- a/pymllm/orchestrator/tokenizer_process.py
+++ b/pymllm/orchestrator/tokenizer_process.py
@@ -41,10 +41,16 @@ def __init__(
     def init_sockets(self) -> None:
         self._zmq_ctx = zmq.Context()
         self._recv_from_rr = create_zmq_socket(
-            self._zmq_ctx, zmq.PULL, self._recv_from_rr_addr, bind=False,
+            self._zmq_ctx,
+            zmq.PULL,
+            self._recv_from_rr_addr,
+            bind=False,
         )
         self._send_to_scheduler = create_zmq_socket(
-            self._zmq_ctx, zmq.PUSH, self._send_to_scheduler_addr, bind=True,
+            self._zmq_ctx,
+            zmq.PUSH,
+            self._send_to_scheduler_addr,
+            bind=True,
         )
 
     def event_loop(self) -> None:

From 65f00b4eda03ec1d86db16ae8d89b17fd4684e86 Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Fri, 27 Feb 2026 11:23:17 +0000
Subject: [PATCH 10/13] feat(engine): support batch generation and enable
 shared memory queue for IPC

- Add enable_shared_queue config option to server configuration
- Implement shared memory queue for fast IPC between tokenizer and scheduler
- Refactor Engine.generate and generate_async to support single and batch requests
- Add colorful ASCII art banner on engine startup if dependencies are available
- Add _make_rids utility to auto-generate request IDs for batch and single requests
- Implement TokenizedGenerateReqInput with multimodal inputs support
- Refactor RequestResponseProcess to handle batch requests and return list of ReqStates
- Enhance SchedulerProcess to receive requests from shared queue or legacy ZMQ
- Introduce SharedMemoryManager for managing metadata in shared memory segments
- Create TensorQueue to support fast IPC of tensors via shared memory and queues
- Add CUDA IPC Transport module for zero-copy GPU tensor sharing with workspace buffer
- Refactor ModelRunnerProcess to handle batch requests with actual output structure placeholders
- Improve resource management and error handling in shared memory IPC utilities
---
 pymllm/configs/server_config.py               |   1 +
 .../normal.py => engine/__init__.py}          |   0
 pymllm/engine/forward_batch.py                |   0
 pymllm/engine/io_struct.py                    |   9 +
 pymllm/engine/launch.py                       | 181 +++++++--
 pymllm/layers/attention/attention_backend.py  |   0
 pymllm/layers/attention/flashinfer_backend.py |   0
 pymllm/layers/attention/radix_attention.py    |   0
 pymllm/orchestrator/cuda_ipc_transport.py     | 373 ++++++++++++++++++
 pymllm/orchestrator/model_runner_process.py   |  49 ++-
 .../orchestrator/request_response_process.py  |  40 +-
 pymllm/orchestrator/scheduler_process.py      |  97 ++++-
 pymllm/orchestrator/shared_memory_queue.py    | 190 +++++++++
 pymllm/orchestrator/tokenizer_process.py      | 311 ++++++++++++++-
 pyproject.toml                                |   2 +
 15 files changed, 1184 insertions(+), 69 deletions(-)
 rename pymllm/{layers/attention/normal.py => engine/__init__.py} (100%)
 create mode 100644 pymllm/engine/forward_batch.py
 create mode 100644 pymllm/layers/attention/attention_backend.py
 create mode 100644 pymllm/layers/attention/flashinfer_backend.py
 create mode 100644 pymllm/layers/attention/radix_attention.py
 create mode 100644 pymllm/orchestrator/cuda_ipc_transport.py
 create mode 100644 pymllm/orchestrator/shared_memory_queue.py

diff --git a/pymllm/configs/server_config.py b/pymllm/configs/server_config.py
index 7cda9c3b..9e399d62 100644
--- a/pymllm/configs/server_config.py
+++ b/pymllm/configs/server_config.py
@@ -78,6 +78,7 @@ class ServerConfig:
     # --------------------------------------------------------------------- #
     # Feature switches
     # --------------------------------------------------------------------- #
+    enable_shared_queue: bool = False  # Use shared memory queue for fast IPC
     # enable_lora: bool = False
     # max_loaded_loras: Optional[int] = None
     # max_loras_per_batch: int = 8
diff --git a/pymllm/layers/attention/normal.py b/pymllm/engine/__init__.py
similarity index 100%
rename from pymllm/layers/attention/normal.py
rename to pymllm/engine/__init__.py
diff --git a/pymllm/engine/forward_batch.py b/pymllm/engine/forward_batch.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/engine/io_struct.py b/pymllm/engine/io_struct.py
index 777186e2..06c8d78d 100644
--- a/pymllm/engine/io_struct.py
+++ b/pymllm/engine/io_struct.py
@@ -135,8 +135,17 @@ def to_request_dict(self) -> Dict[str, Any]:
 
 @dataclass
 class TokenizedGenerateReqInput(BaseReq):
+    # The decoded text passed to the tokenizer (empty string if only input_ids
+    # were provided by the caller).
     input_text: str = ""
+    # Token IDs produced by the tokenizer.
     input_ids: List[int] = field(default_factory=list)
+    # Multimodal inputs (processor output, e.g. pixel_values, or raw image /
+    # audio / video data when no processor is available).  ``None`` means the
+    # request is text-only.
+    mm_inputs: Optional[Dict[str, Any]] = None
+    # Raw sampling parameters dict (parsed into a SamplingParams object by the
+    # model runner when needed).
     sampling_params: Dict[str, Any] = field(default_factory=dict)
     stream: bool = False
     return_logprob: bool = False
diff --git a/pymllm/engine/launch.py b/pymllm/engine/launch.py
index edad97af..2200d7f3 100644
--- a/pymllm/engine/launch.py
+++ b/pymllm/engine/launch.py
@@ -11,6 +11,14 @@
 from transformers import AutoConfig
 from huggingface_hub import snapshot_download
 
+try:
+    from pyfiglet import figlet_format
+    from termcolor import colored
+
+    HAS_BANNER_LIBS = True
+except ImportError:
+    HAS_BANNER_LIBS = False
+
 from pymllm.configs import get_global_config
 from pymllm.engine.io_struct import GenerateReqInput
 from pymllm.orchestrator.ipc_utils import make_ipc_address
@@ -18,6 +26,7 @@
     ReqState,
     RequestResponseProcess,
 )
+from pymllm.orchestrator.shared_memory_queue import TensorQueue
 from pymllm.orchestrator.tokenizer_process import run_tokenizer_process
 from pymllm.orchestrator.scheduler_process import run_scheduler_process
 from pymllm.orchestrator.model_runner_process import run_model_runner_process
@@ -68,6 +77,26 @@ def _launch_processes(self) -> None:
         # Record all subprocesses
         procs_and_readers: List[tuple] = []
 
+        # Config dict for the tokenizer subprocess (must be picklable).
+        cfg = get_global_config()
+        enable_shared_queue = cfg.server.enable_shared_queue
+
+        # Create shared queue if enabled
+        shared_queue = None
+        if enable_shared_queue:
+            # TODO: WCH init CUDA IPC things.
+            shared_queue = TensorQueue(maxsize=1000)  # Configurable max size
+            logger.info("Shared memory queue enabled for fast IPC")
+
+        tokenizer_cfg: Dict[str, Any] = {
+            "tokenizer_path": str(cfg.server.tokenizer_path),
+            "tokenizer_mode": cfg.server.tokenizer_mode,
+            "trust_remote_code": cfg.server.trust_remote_code,
+            "context_length": cfg.server.context_length,
+            "hf_config": cfg.model.hf_config,
+            "enable_shared_queue": enable_shared_queue,
+        }
+
         # Tokenizer
         tokenizer_reader, tokenizer_writer = mp.Pipe(duplex=False)
         tokenizer_proc = mp.Process(
@@ -76,6 +105,8 @@ def _launch_processes(self) -> None:
                 addr_request_response_to_tokenizer,
                 addr_tokenizer_to_scheduler,
                 tokenizer_writer,
+                tokenizer_cfg,
+                shared_queue,  # Pass shared queue
             ),
             daemon=True,
         )
@@ -91,6 +122,8 @@ def _launch_processes(self) -> None:
                 addr_model_runner_to_scheduler,
                 addr_scheduler_to_detokenizer,
                 scheduler_writer,
+                shared_queue,  # Pass shared queue
+                enable_shared_queue,  # Pass flag
             ),
             daemon=True,
         )
@@ -165,6 +198,29 @@ def _launch_processes(self) -> None:
         self._rr_process.start(self._loop)
         logger.info("RequestResponseProcess started in main process")
 
+        # Print colorful gradient ASCII art banner
+        if HAS_BANNER_LIBS:
+            try:
+                text = figlet_format("pymllm", font="slant")
+                fired_up = figlet_format("FIRED UP!", font="slant")
+
+                # Apply blue-purple gradient
+                lines = text.strip().split("\n")
+                colors_cycle = ["blue", "cyan", "blue", "magenta", "magenta"]
+                for i, line in enumerate(lines):
+                    color = colors_cycle[i % len(colors_cycle)]
+                    print(colored(line, color, attrs=["bold"]))
+
+                # Print "FIRED UP!" in bright magenta
+                for line in fired_up.strip().split("\n"):
+                    print(colored(line, "magenta", attrs=["bold"]))
+                print()
+            except Exception as e:
+                logger.debug(f"Failed to print banner: {e}")
+                print("🚀 pymllm FIRED UP! 🚀\n")
+        else:
+            print("🚀 pymllm FIRED UP! 🚀\n")
+
     def generate(
         self,
         prompt: Optional[Union[List[str], str]] = None,
@@ -181,10 +237,14 @@ def generate(
         stream: bool = False,
         rid: Optional[Union[List[str], str]] = None,
         **kwargs,
-    ) -> Dict[str, Any]:
-        """Synchronous, non-streaming generation entry point."""
-        if rid is None:
-            rid = uuid.uuid4().hex
+    ) -> Union[Dict[str, Any], List[Dict[str, Any]]]:
+        """Synchronous, non-streaming generation entry point.
+
+        Accepts a single prompt (``str``) or a batch (``List[str]``).  Returns a
+        single result dict for single inputs and a list of result dicts for batch
+        inputs, preserving the input order.
+        """
+        rid = self._make_rids(rid, prompt, input_ids)
         request = GenerateReqInput(
             rid=rid,
             text=prompt,
@@ -203,11 +263,18 @@ def generate(
         )
         request.normalize_batch_and_arguments()
 
-        async def _run() -> Dict[str, Any]:
-            state = await self._rr_process.add_request(request)
-            if isinstance(rid, list):
-                raise ValueError("Synchronous `generate` currently supports single request.")
-            return await self._wait_for_final_result(rid, state)
+        async def _run() -> Union[Dict[str, Any], List[Dict[str, Any]]]:
+            result = await self._rr_process.add_request(request)
+            if request.is_single:
+                single_rid = rid if isinstance(rid, str) else rid[0]
+                return await self._wait_for_final_result(single_rid, result)  # type: ignore[arg-type]
+            # Batch: wait for every sub-request concurrently.
+            rids_list: List[str] = rid if isinstance(rid, list) else [rid]  # type: ignore[assignment]
+            states: List[ReqState] = result  # type: ignore[assignment]
+            outputs = await asyncio.gather(
+                *(self._wait_for_final_result(r, s) for r, s in zip(rids_list, states))
+            )
+            return list(outputs)
 
         return self._loop.run_until_complete(_run())
 
@@ -230,13 +297,14 @@ async def generate_async(
     ) -> AsyncIterator[Dict[str, Any]]:
         """Asynchronous generation entry point.
 
-        When *stream* is ``False`` (default) the returned async iterator
-        yields a **single** final result dict.  When *stream* is ``True``
-        every incremental chunk from the detokenizer is yielded as it
-        arrives, following the ``Event + out_list`` pattern.
+        For a **single** request and ``stream=False`` yields one final result
+        dict; with ``stream=True`` yields incremental chunks.
+
+        For a **batch** request the iterator yields the final result for each
+        sub-request as it completes (order not guaranteed); streaming mode yields
+        incremental chunks from all sub-requests interleaved.
         """
-        if rid is None:
-            rid = uuid.uuid4().hex
+        rid = self._make_rids(rid, prompt, input_ids)
         request = GenerateReqInput(
             rid=rid,
             text=prompt,
@@ -254,18 +322,55 @@ async def generate_async(
             extra_options=kwargs,
         )
         request.normalize_batch_and_arguments()
-        state = await self._rr_process.add_request(request)
+        result = await self._rr_process.add_request(request)
 
-        try:
-            if isinstance(rid, list):
-                raise ValueError("`generate_async` currently supports single request only.")
-            if stream:
-                async for chunk in self._stream_results(rid, state):
-                    yield chunk
-            else:
-                yield await self._wait_for_final_result(rid, state)
-        finally:
-            self._rr_process.remove_state(rid)
+        if request.is_single:
+            single_rid = rid if isinstance(rid, str) else rid[0]  # type: ignore[index]
+            state: ReqState = result  # type: ignore[assignment]
+            try:
+                if stream:
+                    async for chunk in self._stream_results(single_rid, state):
+                        yield chunk
+                else:
+                    yield await self._wait_for_final_result(single_rid, state)
+            finally:
+                self._rr_process.remove_state(single_rid)
+        else:
+            rids_list: List[str] = rid if isinstance(rid, list) else [rid]  # type: ignore[assignment]
+            states: List[ReqState] = result  # type: ignore[assignment]
+            try:
+                if stream:
+                    # Merge streams from all sub-requests using an asyncio queue.
+                    queue: asyncio.Queue = asyncio.Queue()
+
+                    async def _forward(r: str, s: ReqState) -> None:
+                        async for chunk in self._stream_results(r, s):
+                            await queue.put(chunk)
+                        await queue.put(None)  # sentinel
+
+                    tasks = [
+                        asyncio.create_task(_forward(r, s))
+                        for r, s in zip(rids_list, states)
+                    ]
+                    done_count = 0
+                    while done_count < len(tasks):
+                        item = await queue.get()
+                        if item is None:
+                            done_count += 1
+                        else:
+                            yield item
+                    await asyncio.gather(*tasks)
+                else:
+                    for coro in asyncio.as_completed(
+                        [
+                            self._wait_for_final_result(r, s)
+                            for r, s in zip(rids_list, states)
+                        ]
+                    ):
+                        yield await coro
+            finally:
+                for r in rids_list:
+                    self._rr_process.remove_state(r)
 
     @staticmethod
     async def _wait_for_final_result(rid: str, state: ReqState) -> Dict[str, Any]:
@@ -290,6 +395,30 @@ async def _stream_results(
                 return
             state.event.clear()
 
+    @staticmethod
+    def _make_rids(
+        rid: Optional[Union[str, List[str]]],
+        prompt: Optional[Union[str, List[str]]],
+        input_ids: Optional[Union[List[int], List[List[int]]]],
+    ) -> Union[str, List[str]]:
+        """Return rids, auto-generating UUIDs when *rid* is ``None``.
+
+        The helper infers whether the call is a batch from *prompt* / *input_ids*
+        so callers don't have to handle this case themselves.
+        """
+        if rid is not None:
+            return rid
+        # Determine batch size from the text/input_ids argument.
+        is_batch = isinstance(prompt, list) or (
+            isinstance(input_ids, list)
+            and len(input_ids) > 0
+            and isinstance(input_ids[0], list)
+        )
+        if is_batch:
+            n = len(prompt) if prompt is not None else len(input_ids)  # type: ignore[arg-type]
+            return [uuid.uuid4().hex for _ in range(n)]
+        return uuid.uuid4().hex
+
     def shutdown(self) -> None:
         """Terminate all subprocesses."""
         if self._rr_process is not None:
diff --git a/pymllm/layers/attention/attention_backend.py b/pymllm/layers/attention/attention_backend.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/layers/attention/flashinfer_backend.py b/pymllm/layers/attention/flashinfer_backend.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/layers/attention/radix_attention.py b/pymllm/layers/attention/radix_attention.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/orchestrator/cuda_ipc_transport.py b/pymllm/orchestrator/cuda_ipc_transport.py
new file mode 100644
index 00000000..7052f0e8
--- /dev/null
+++ b/pymllm/orchestrator/cuda_ipc_transport.py
@@ -0,0 +1,373 @@
+"""
+CUDA IPC Transport for zero-copy tensor sharing between processes.
+
+This module implements CUDA IPC with workspace buffer management
+to avoid PyTorch's memory leak issue when sharing IPC handles.
+
+1. Create a workspace buffer on GPU (pre-allocated memory pool)
+2. Copy tensor data to a chunk in the workspace
+3. Get CUDA IPC handle for the chunk
+4. Send handle + metadata (shape, dtype, offset) to another process
+5. Reconstruct tensor in target process from IPC handle
+6. Copy to local tensor and mark chunk as reusable
+
+Key Problem Solved:
+    PyTorch never releases tensors whose IPC handles are shared until process ends.
+    Solution: Use a fixed-size workspace buffer and recycle chunks.
+"""
+
+import logging
+import struct
+import uuid
+from dataclasses import dataclass
+from multiprocessing import Queue
+from multiprocessing.shared_memory import SharedMemory
+from typing import Any, Dict, List, Optional, Tuple
+
+import torch
+import torch.cuda as cuda
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class MemoryChunk:
+    """Represents a chunk in the workspace buffer."""
+
+    offset: int  # Offset in bytes from workspace start
+    size: int  # Size in bytes
+    in_use: bool  # Whether the chunk is currently occupied
+    sync_shm_name: Optional[str] = None  # Shared memory name for sync flag
+
+
+class WorkspaceBuffer:
+    """GPU memory pool for storing multimodal tensors temporarily.
+
+    This prevents the PyTorch IPC handle memory leak by using a fixed-size
+    pre-allocated buffer and recycling chunks.
+    """
+
+    def __init__(self, size_gb: float = 4.0, device: int = 0):
+        """Initialize workspace buffer.
+
+        Args:
+            size_gb: Total size of workspace in GB
+            device: CUDA device ID
+        """
+        self.device = device
+        self.total_size = int(size_gb * 1024 * 1024 * 1024)  # Convert GB to bytes
+
+        # Allocate workspace on GPU
+        with torch.cuda.device(device):
+            self.workspace = torch.empty(
+                self.total_size // 4,  # Divide by 4 because we use float32
+                dtype=torch.float32,
+                device=f"cuda:{device}",
+            )
+
+        # Initialize chunk management
+        self.chunks: List[MemoryChunk] = [
+            MemoryChunk(offset=0, size=self.total_size, in_use=False)
+        ]
+
+        # Container for reusable sync buffers
+        self.sync_buffer_pool: List[str] = []
+
+        logger.info(
+            f"WorkspaceBuffer initialized: {size_gb}GB on cuda:{device}, "
+            f"ptr={self.workspace.data_ptr():#x}"
+        )
+
+    def allocate(self, size_bytes: int) -> Optional[Tuple[int, str]]:
+        """Allocate a chunk from the workspace.
+
+        Args:
+            size_bytes: Required size in bytes
+
+        Returns:
+            Tuple of (offset, sync_shm_name) if successful, None if no space
+        """
+        # Find a free chunk that's large enough
+        for i, chunk in enumerate(self.chunks):
+            if not chunk.in_use and chunk.size >= size_bytes:
+                # Mark chunk as in use
+                chunk.in_use = True
+
+                # Get or create sync buffer
+                if self.sync_buffer_pool:
+                    sync_shm_name = self.sync_buffer_pool.pop()
+                    # Reset sync flag to 0 (not ready)
+                    self._reset_sync_buffer(sync_shm_name)
+                else:
+                    sync_shm_name = self._create_sync_buffer()
+
+                chunk.sync_shm_name = sync_shm_name
+
+                # If chunk is larger than needed, split it
+                if chunk.size > size_bytes:
+                    # Create a new free chunk for the remaining space
+                    new_chunk = MemoryChunk(
+                        offset=chunk.offset + size_bytes,
+                        size=chunk.size - size_bytes,
+                        in_use=False,
+                    )
+                    chunk.size = size_bytes
+                    self.chunks.insert(i + 1, new_chunk)
+
+                logger.debug(
+                    f"Allocated chunk: offset={chunk.offset}, size={size_bytes}, "
+                    f"sync_shm={sync_shm_name}"
+                )
+                return chunk.offset, sync_shm_name
+
+        logger.warning(f"WorkspaceBuffer: No space for {size_bytes} bytes")
+        return None
+
+    def release(self, offset: int) -> None:
+        """Release a chunk back to the pool.
+
+        Args:
+            offset: Offset of the chunk to release
+        """
+        for i, chunk in enumerate(self.chunks):
+            if chunk.offset == offset and chunk.in_use:
+                chunk.in_use = False
+
+                # Return sync buffer to pool
+                if chunk.sync_shm_name:
+                    self.sync_buffer_pool.append(chunk.sync_shm_name)
+                    chunk.sync_shm_name = None
+
+                # Try to merge with adjacent free chunks
+                self._merge_chunks()
+
+                logger.debug(f"Released chunk: offset={offset}")
+                return
+
+        logger.warning(f"Attempted to release unknown chunk at offset {offset}")
+
+    def _merge_chunks(self) -> None:
+        """Merge adjacent free chunks to reduce fragmentation."""
+        i = 0
+        while i < len(self.chunks) - 1:
+            current = self.chunks[i]
+            next_chunk = self.chunks[i + 1]
+
+            if not current.in_use and not next_chunk.in_use:
+                # Merge chunks
+                current.size += next_chunk.size
+
+                # Keep first chunk's sync buffer, return second to pool
+                if next_chunk.sync_shm_name:
+                    self.sync_buffer_pool.append(next_chunk.sync_shm_name)
+
+                self.chunks.pop(i + 1)
+            else:
+                i += 1
+
+    def _create_sync_buffer(self) -> str:
+        """Create a new shared memory sync buffer (8 bytes, initialized to 0)."""
+        shm_name = f"pymllm_sync_{uuid.uuid4().hex[:12]}"
+        shm = SharedMemory(name=shm_name, create=True, size=8)
+        # Initialize to 0 (not ready)
+        shm.buf[:8] = struct.pack("Q", 0)
+        shm.close()
+        logger.debug(f"Created sync buffer: {shm_name}")
+        return shm_name
+
+    def _reset_sync_buffer(self, shm_name: str) -> None:
+        """Reset sync buffer to 0 (not ready)."""
+        try:
+            shm = SharedMemory(name=shm_name, create=False)
+            shm.buf[:8] = struct.pack("Q", 0)
+            shm.close()
+        except Exception as e:
+            logger.warning(f"Failed to reset sync buffer {shm_name}: {e}")
+
+    def copy_tensor_to_workspace(self, tensor: torch.Tensor, offset: int) -> None:
+        """Copy tensor data to workspace at given offset.
+
+        Args:
+            tensor: Source tensor (must be on same CUDA device)
+            offset: Byte offset in workspace
+        """
+        if not tensor.is_cuda or tensor.device.index != self.device:
+            raise ValueError(f"Tensor must be on cuda:{self.device}")
+
+        size_bytes = tensor.numel() * tensor.element_size()
+
+        # Get view of workspace at offset
+        offset_elements = offset // 4  # Workspace is float32
+        num_elements = (size_bytes + 3) // 4  # Round up
+
+        workspace_view = self.workspace[
+            offset_elements : offset_elements + num_elements
+        ]
+
+        # Copy tensor data (flatten and cast to float32 view)
+        tensor_flat = tensor.flatten().view(torch.uint8)
+        workspace_flat = workspace_view.view(torch.uint8)[: tensor_flat.numel()]
+        workspace_flat.copy_(tensor_flat)
+
+        logger.debug(f"Copied tensor {tensor.shape} to workspace offset {offset}")
+
+    def get_ipc_handle(self) -> bytes:
+        """Get CUDA IPC handle for the workspace buffer.
+
+        Returns:
+            CUDA IPC handle as bytes
+        """
+        # Get IPC handle using torch.cuda API
+        # Note: This requires CUDA-capable device with IPC support
+        handle = cuda.cudart().cudaIpcGetMemHandle(self.workspace.data_ptr())
+        return bytes(handle)
+
+    def cleanup(self) -> None:
+        """Cleanup all sync buffers."""
+        all_shm_names = set()
+        for chunk in self.chunks:
+            if chunk.sync_shm_name:
+                all_shm_names.add(chunk.sync_shm_name)
+        all_shm_names.update(self.sync_buffer_pool)
+
+        for shm_name in all_shm_names:
+            try:
+                shm = SharedMemory(name=shm_name, create=False)
+                shm.close()
+                shm.unlink()
+            except FileNotFoundError:
+                pass
+            except Exception as e:
+                logger.warning(f"Failed to cleanup sync buffer {shm_name}: {e}")
+
+        logger.info("WorkspaceBuffer cleaned up")
+
+
+@dataclass
+class TensorMetadata:
+    """Metadata for reconstructing a tensor from CUDA IPC handle."""
+
+    shape: Tuple[int, ...]
+    dtype: torch.dtype
+    offset: int  # Byte offset in workspace
+    size_bytes: int
+    sync_shm_name: str  # Shared memory name for sync flag
+
+
+class CudaIPCTransport:
+    """Transport for sharing CUDA tensors via IPC handles."""
+
+    def __init__(
+        self,
+        workspace_size_gb: float = 4.0,
+        device: int = 0,
+    ):
+        """Initialize CUDA IPC transport.
+
+        Args:
+            workspace_size_gb: Size of workspace buffer in GB
+            device: CUDA device ID
+        """
+        self.device = device
+        self.workspace = WorkspaceBuffer(workspace_size_gb, device)
+        self.ipc_handle = self.workspace.get_ipc_handle()
+        self.queue: Queue = Queue()
+
+    def send_tensor(self, rid: str, tensor: torch.Tensor) -> bool:
+        """Send a tensor via CUDA IPC.
+
+        Args:
+            rid: Request ID
+            tensor: Tensor to send (must be on CUDA)
+
+        Returns:
+            True if sent via CUDA IPC, False if fallback needed
+        """
+        if not tensor.is_cuda:
+            logger.debug(f"Tensor for {rid} not on CUDA, skipping IPC")
+            return False
+
+        size_bytes = tensor.numel() * tensor.element_size()
+
+        # Try to allocate from workspace
+        result = self.workspace.allocate(size_bytes)
+        if result is None:
+            logger.warning(
+                f"WorkspaceBuffer full, falling back to shared queue for {rid}"
+            )
+            return False
+
+        offset, sync_shm_name = result
+
+        # Copy tensor to workspace
+        self.workspace.copy_tensor_to_workspace(tensor, offset)
+
+        # Create metadata
+        metadata = TensorMetadata(
+            shape=tuple(tensor.shape),
+            dtype=tensor.dtype,
+            offset=offset,
+            size_bytes=size_bytes,
+            sync_shm_name=sync_shm_name,
+        )
+
+        # Send metadata through queue
+        self.queue.put((rid, metadata, self.ipc_handle))
+
+        logger.debug(f"Sent tensor {tensor.shape} for {rid} via CUDA IPC")
+        return True
+
+    def receive_tensor(
+        self, timeout: float = 0.0001
+    ) -> Optional[Tuple[str, torch.Tensor]]:
+        """Receive a tensor via CUDA IPC.
+
+        Args:
+            timeout: Timeout for queue.get
+
+        Returns:
+            Tuple of (rid, tensor) or None if queue empty
+        """
+        try:
+            rid, metadata, ipc_handle = self.queue.get(timeout=timeout)
+        except Exception:
+            return None
+
+        # Open IPC memory handle
+        # Note: This creates a tensor view into the remote process's workspace
+        with torch.cuda.device(self.device):
+            # Reconstruct tensor from IPC handle
+            # This is a view into remote memory, we need to copy it locally
+
+            # For now, use a simpler approach: signal to copy later
+            # In production, you'd use cuda.cudart().cudaIpcOpenMemHandle
+
+            logger.warning(
+                "CUDA IPC receive not fully implemented - requires cudaIpcOpenMemHandle"
+            )
+            # TODO: Implement actual IPC handle opening
+
+            # Create local tensor and signal copy completion
+            tensor = torch.empty(
+                metadata.shape, dtype=metadata.dtype, device=f"cuda:{self.device}"
+            )
+
+            # Mark chunk as ready for reuse by setting sync flag
+            self._mark_chunk_reusable(metadata.sync_shm_name)
+
+            return rid, tensor
+
+    def _mark_chunk_reusable(self, sync_shm_name: str) -> None:
+        """Mark a chunk as reusable by setting sync flag to 1."""
+        try:
+            shm = SharedMemory(name=sync_shm_name, create=False)
+            shm.buf[:8] = struct.pack("Q", 1)  # Set to 1 (ready for reuse)
+            shm.close()
+            logger.debug(f"Marked chunk reusable: {sync_shm_name}")
+        except Exception as e:
+            logger.error(f"Failed to mark chunk reusable {sync_shm_name}: {e}")
+
+    def cleanup(self) -> None:
+        """Cleanup resources."""
+        self.workspace.cleanup()
+        self.queue.close()
diff --git a/pymllm/orchestrator/model_runner_process.py b/pymllm/orchestrator/model_runner_process.py
index 4b28645e..b60966dd 100644
--- a/pymllm/orchestrator/model_runner_process.py
+++ b/pymllm/orchestrator/model_runner_process.py
@@ -7,7 +7,7 @@
 
 import logging
 from multiprocessing.connection import Connection
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 
 import zmq
 
@@ -41,10 +41,16 @@ def __init__(
     def init_sockets(self) -> None:
         self._zmq_ctx = zmq.Context()
         self._recv_from_scheduler = create_zmq_socket(
-            self._zmq_ctx, zmq.PULL, self._recv_from_scheduler_addr, bind=False,
+            self._zmq_ctx,
+            zmq.PULL,
+            self._recv_from_scheduler_addr,
+            bind=False,
         )
         self._send_to_scheduler = create_zmq_socket(
-            self._zmq_ctx, zmq.PUSH, self._send_to_scheduler_addr, bind=False,
+            self._zmq_ctx,
+            zmq.PUSH,
+            self._send_to_scheduler_addr,
+            bind=False,
         )
 
     def event_loop(self) -> None:
@@ -62,18 +68,41 @@ def event_loop(self) -> None:
     def _forward_batch(self, batch: Dict[str, Any]) -> Dict[str, Any]:
         """Run the model forward pass and sampling for *batch*.
 
+        *batch* is a dict produced by ``SchedulerProcess.get_next_batch_to_run``
+        whose ``"requests"`` list contains
+        :class:`~pymllm.engine.io_struct.TokenizedGenerateReqInput` objects.
+
+        Returns a dict ``{"batch_id": ..., "finished": [...], "unfinished": [...]}``
+        where each element of *finished* / *unfinished* is a plain output dict
+        containing at least ``"rid"`` and ``"output_token_ids"``.
+
         TODO: implement real forward pass, logits processing, and sampling.
         """
         requests = batch.get("requests", [])
-        finished = []
-        unfinished = []
+        finished: List[Dict[str, Any]] = []
+        unfinished: List[Dict[str, Any]] = []
 
         for req in requests:
-            # TODO: actual model forward, logits -> next_token_ids
-            next_token_ids = []  # placeholder
-            req["output_token_ids"] = req.get("output_token_ids", []) + next_token_ids
-            # TODO: check EOS / max_tokens to decide finished vs. unfinished
-            finished.append(req)
+            # Support both TokenizedGenerateReqInput dataclass (normal path) and
+            # legacy plain dicts (defensive).
+            rid: str = req.rid if hasattr(req, "rid") else req.get("rid")
+            input_ids: List[int] = (
+                req.input_ids if hasattr(req, "input_ids") else req.get("input_ids", [])
+            )
+            mm_inputs: Optional[Dict[str, Any]] = (
+                req.mm_inputs if hasattr(req, "mm_inputs") else req.get("mm_inputs")
+            )
+
+            # TODO: actual model forward; pass input_ids and mm_inputs to the model.
+            next_token_ids: List[int] = []  # placeholder
+
+            output: Dict[str, Any] = {
+                "rid": rid,
+                "output_token_ids": next_token_ids,
+                "finished": True,
+            }
+            # TODO: check EOS / max_tokens to decide finished vs. unfinished.
+            finished.append(output)
 
         return {
             "batch_id": batch.get("batch_id"),
diff --git a/pymllm/orchestrator/request_response_process.py b/pymllm/orchestrator/request_response_process.py
index 74335428..fa9d92ec 100644
--- a/pymllm/orchestrator/request_response_process.py
+++ b/pymllm/orchestrator/request_response_process.py
@@ -14,7 +14,7 @@
 import asyncio
 import dataclasses
 import logging
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 import zmq
 import zmq.asyncio
@@ -82,19 +82,39 @@ def start(self, loop: asyncio.AbstractEventLoop) -> None:
         )
         self._loop_task = loop.create_task(self._run())
 
-    async def add_request(self, request: GenerateReqInput) -> ReqState:
-        """Enqueue a request and return its :class:`ReqState`.
+    async def add_request(
+        self, request: GenerateReqInput
+    ) -> Union[ReqState, List[ReqState]]:
+        """Enqueue request(s) and return the corresponding :class:`ReqState`(s).
+
+        * **Single request** (``request.is_single is True``): behaves exactly as
+          before – registers one ``ReqState`` and enqueues one message.
+        * **Batch request** (``request.is_single is False``): splits the batch
+          into *N* individual sub-requests, registers a ``ReqState`` per rid, and
+          enqueues each sub-request separately so the downstream pipeline sees
+          independent messages.  Returns a ``List[ReqState]`` in the same order
+          as the input rids.
 
         Callers should ``await state.event.wait()`` in a loop, consuming
         ``state.out_list`` entries until ``state.finished`` is ``True``.
         """
-        if not isinstance(request.rid, str):
-            raise ValueError("RequestResponseProcess currently accepts single requests only.")
-        rid = request.rid
-        state = ReqState()
-        self._rid_to_state[rid] = state
-        await self._request_queue.put(request.to_request_dict())
-        return state
+        if request.is_single:
+            rid = request.rid if isinstance(request.rid, str) else request.rid[0]
+            state = ReqState()
+            self._rid_to_state[rid] = state
+            await self._request_queue.put(request.to_request_dict())
+            return state
+
+        # Batch path: fan-out into individual sub-requests.
+        states: List[ReqState] = []
+        for i in range(request.batch_size):
+            sub = request[i]
+            rid = sub.rid if isinstance(sub.rid, str) else str(sub.rid)
+            state = ReqState()
+            self._rid_to_state[rid] = state
+            await self._request_queue.put(sub.to_request_dict())
+            states.append(state)
+        return states
 
     def remove_state(self, rid: str) -> None:
         """Remove the ``ReqState`` for *rid* (called by the caller once done)."""
diff --git a/pymllm/orchestrator/scheduler_process.py b/pymllm/orchestrator/scheduler_process.py
index e7394dab..64ea55b0 100644
--- a/pymllm/orchestrator/scheduler_process.py
+++ b/pymllm/orchestrator/scheduler_process.py
@@ -5,6 +5,10 @@
 batches, dispatches batches to the ModelRunnerProcess for forward passes,
 collects results, and streams finished token IDs to the DetokenizerProcess.
 
+Supports two modes:
+    1. Legacy ZMQ path: Receive TokenizedGenerateReqInput via ZMQ recv_pyobj
+    2. Shared queue fast path: Read rid from shared queue and metadata from shared memory
+
 The main ``event_loop`` scheduler flow::
 
     while True:
@@ -18,6 +22,7 @@
 """
 
 import logging
+import queue as stdlib_queue
 import time
 from collections import deque
 from multiprocessing.connection import Connection
@@ -25,7 +30,9 @@
 
 import zmq
 
+from pymllm.engine.io_struct import TokenizedGenerateReqInput
 from pymllm.orchestrator.ipc_utils import create_zmq_socket
+from pymllm.orchestrator.shared_memory_queue import SharedMemoryManager, TensorQueue
 
 logger = logging.getLogger(__name__)
 
@@ -39,6 +46,8 @@ def __init__(
         send_to_model_runner_addr: str,
         recv_from_model_runner_addr: str,
         send_to_detokenizer_addr: str,
+        shared_queue: Optional[TensorQueue] = None,
+        enable_shared_queue: bool = False,
     ):
         # ZMQ addresses
         self._recv_from_tokenizer_addr = recv_from_tokenizer_addr
@@ -46,6 +55,10 @@ def __init__(
         self._recv_from_model_runner_addr = recv_from_model_runner_addr
         self._send_to_detokenizer_addr = send_to_detokenizer_addr
 
+        # Shared queue configuration
+        self._shared_queue = shared_queue
+        self._enable_shared_queue = enable_shared_queue
+
         # ZMQ runtime objects (initialised in init_sockets)
         self._zmq_ctx: Optional[zmq.Context] = None
         self._recv_from_tokenizer: Optional[zmq.Socket] = None
@@ -55,7 +68,7 @@ def __init__(
         self._poller: Optional[zmq.Poller] = None
 
         # Request management
-        self._waiting_queue: Deque[Dict[str, Any]] = deque()
+        self._waiting_queue: Deque[TokenizedGenerateReqInput] = deque()
         self._running_batch: Optional[Dict[str, Any]] = None
         self._finished: List[Dict[str, Any]] = []
 
@@ -97,7 +110,10 @@ def init_sockets(self) -> None:
 
     def event_loop(self) -> None:
         """Infinite scheduling loop."""
-        logger.info("SchedulerProcess event loop started")
+        logger.info(
+            "SchedulerProcess event loop started (shared_queue=%s)",
+            self._enable_shared_queue,
+        )
         while True:
             self.recv_requests()
             self.process_input_requests()
@@ -114,15 +130,80 @@ def event_loop(self) -> None:
     def recv_requests(self) -> None:
         """Non-blocking receive of tokenized requests from TokenizerProcess.
 
-        Uses ``zmq.Poller`` with a short timeout so the scheduler is never
-        stuck waiting when there are batches to run.
+        Supports two modes:
+        1. Legacy ZMQ: Uses ``zmq.Poller`` with a short timeout
+        2. Shared queue: Non-blocking get from multiprocessing.Queue
+
+        Messages are either:
+        * A :class:`~pymllm.engine.io_struct.TokenizedGenerateReqInput`
+          dataclass – appended to ``_waiting_queue``.
+        * A plain abort sentinel dict ``{"rid": ..., "abort": True}`` – handled
+          inline by removing the matching rid from the waiting queue.
         """
+        if self._enable_shared_queue and self._shared_queue is not None:
+            self._recv_from_shared_queue()
+        else:
+            self._recv_from_zmq()
+
+    def _recv_from_zmq(self) -> None:
+        """Receive requests via legacy ZMQ path."""
         while True:
             events = dict(self._poller.poll(timeout=0))  # non-blocking
             if self._recv_from_tokenizer not in events:
                 break
-            req = self._recv_from_tokenizer.recv_pyobj(zmq.NOBLOCK)
-            self._waiting_queue.append(req)
+            msg = self._recv_from_tokenizer.recv_pyobj(zmq.NOBLOCK)
+            # Abort sentinel: plain dict with "abort" key.
+            if isinstance(msg, dict) and msg.get("abort"):
+                rid = msg.get("rid")
+                logger.debug("Scheduler received abort for rid=%s", rid)
+                self._waiting_queue = type(self._waiting_queue)(
+                    r for r in self._waiting_queue if r.rid != rid
+                )
+            else:
+                self._waiting_queue.append(msg)
+
+    def _recv_from_shared_queue(self) -> None:
+        """Receive requests via shared memory + shared queue fast path."""
+        while True:
+            try:
+                # Non-blocking get from shared queue
+                rid, shm_name, mm_inputs = self._shared_queue.get(timeout=0.0001)
+
+                # Read metadata from shared memory (and unlink immediately)
+                metadata: TokenizedGenerateReqInput = SharedMemoryManager.read_metadata(
+                    shm_name, unlink=True
+                )
+
+                # Reconstruct the full TokenizedGenerateReqInput with mm_inputs
+                full_request = TokenizedGenerateReqInput(
+                    rid=metadata.rid,
+                    input_text=metadata.input_text,
+                    input_ids=metadata.input_ids,
+                    mm_inputs=mm_inputs,  # Restored from shared queue
+                    sampling_params=metadata.sampling_params,
+                    stream=metadata.stream,
+                    return_logprob=metadata.return_logprob,
+                    logprob_start_len=metadata.logprob_start_len,
+                    top_logprobs_num=metadata.top_logprobs_num,
+                    lora_path=metadata.lora_path,
+                    session_params=metadata.session_params,
+                )
+
+                self._waiting_queue.append(full_request)
+                logger.debug(f"Received request {rid} from shared queue")
+
+            except stdlib_queue.Empty:
+                # No more requests available
+                break
+            except Exception as e:
+                logger.error(f"Error receiving from shared queue: {e}", exc_info=True)
+                # Try to cleanup shared memory if possible
+                try:
+                    if "shm_name" in locals():
+                        SharedMemoryManager.cleanup(shm_name)
+                except:
+                    pass
+                break
 
     # ------------------------------------------------------------------
     # Step 2: process input requests
@@ -227,6 +308,8 @@ def run_scheduler_process(
     recv_from_model_runner_addr: str,
     send_to_detokenizer_addr: str,
     pipe_writer: Connection,
+    shared_queue: Optional[TensorQueue] = None,
+    enable_shared_queue: bool = False,
 ) -> None:
     """Entry point for ``torch.multiprocessing.Process(target=...)``."""
     proc = SchedulerProcess(
@@ -234,6 +317,8 @@ def run_scheduler_process(
         send_to_model_runner_addr,
         recv_from_model_runner_addr,
         send_to_detokenizer_addr,
+        shared_queue=shared_queue,
+        enable_shared_queue=enable_shared_queue,
     )
     proc.init_sockets()
 
diff --git a/pymllm/orchestrator/shared_memory_queue.py b/pymllm/orchestrator/shared_memory_queue.py
new file mode 100644
index 00000000..3d26ebf1
--- /dev/null
+++ b/pymllm/orchestrator/shared_memory_queue.py
@@ -0,0 +1,190 @@
+"""
+Shared memory and queue utilities for fast IPC between tokenizer and scheduler.
+
+This module implements shared-queue fast path to avoid expensive
+ZMQ serialization of large multimodal tensors.
+
+Design:
+    - Metadata lane: Small tokenized objects stored in shared memory keyed by rid
+    - Tensor lane: Large tensors made shareable via share_memory_() and passed by handle
+"""
+
+import logging
+import pickle
+import uuid
+from multiprocessing import Queue
+from multiprocessing.shared_memory import SharedMemory
+from typing import Any, Dict, Optional
+
+import torch
+
+logger = logging.getLogger(__name__)
+
+
+class SharedMemoryManager:
+    """Manages shared memory segments for passing metadata between processes.
+
+    Each tokenized request's metadata is written to a unique shared memory segment
+    keyed by its request ID (rid). The scheduler reads and immediately unlinks the
+    segment to prevent memory leaks.
+    """
+
+    @staticmethod
+    def write_metadata(rid: str, metadata: Any) -> str:
+        """Write metadata to shared memory and return the segment name.
+
+        Args:
+            rid: Request ID (used as part of the shared memory name)
+            metadata: Serializable metadata object
+
+        Returns:
+            str: The shared memory segment name
+        """
+        # Serialize the metadata
+        data = pickle.dumps(metadata)
+        size = len(data)
+
+        # Create unique shared memory segment name
+        shm_name = f"pymllm_meta_{rid}_{uuid.uuid4().hex[:8]}"
+
+        try:
+            # Create shared memory segment
+            shm = SharedMemory(name=shm_name, create=True, size=size)
+            # Write data
+            shm.buf[:size] = data
+            shm.close()
+            logger.debug(f"Wrote {size} bytes to shared memory {shm_name}")
+            return shm_name
+        except Exception as e:
+            logger.error(f"Failed to write metadata to shared memory: {e}")
+            raise
+
+    @staticmethod
+    def read_metadata(shm_name: str, unlink: bool = True) -> Any:
+        """Read metadata from shared memory and optionally unlink it.
+
+        Args:
+            shm_name: The shared memory segment name
+            unlink: If True, immediately unlink the segment after reading
+
+        Returns:
+            The deserialized metadata object
+        """
+        try:
+            # Open existing shared memory segment
+            shm = SharedMemory(name=shm_name, create=False)
+            # Read and deserialize data
+            data = bytes(shm.buf[:])
+            metadata = pickle.loads(data)
+            shm.close()
+
+            # Unlink to free memory immediately
+            if unlink:
+                try:
+                    shm.unlink()
+                    logger.debug(f"Read and unlinked shared memory {shm_name}")
+                except FileNotFoundError:
+                    # Already unlinked, ignore
+                    pass
+
+            return metadata
+        except Exception as e:
+            logger.error(f"Failed to read metadata from shared memory {shm_name}: {e}")
+            raise
+
+    @staticmethod
+    def cleanup(shm_name: str) -> None:
+        """Manually cleanup a shared memory segment (for error recovery)."""
+        try:
+            shm = SharedMemory(name=shm_name, create=False)
+            shm.close()
+            shm.unlink()
+            logger.debug(f"Cleaned up shared memory {shm_name}")
+        except FileNotFoundError:
+            pass  # Already cleaned up
+        except Exception as e:
+            logger.warning(f"Failed to cleanup shared memory {shm_name}: {e}")
+
+
+class TensorQueue:
+    """Queue for passing large tensors between processes using shared memory.
+
+    Tensors are made shareable via .share_memory_() and passed through a
+    multiprocessing.Queue by handle (metadata only, not the actual data).
+    """
+
+    def __init__(self, maxsize: int = 0):
+        """Initialize the tensor queue.
+
+        Args:
+            maxsize: Maximum queue size (0 for unlimited)
+        """
+        self._queue: Queue = Queue(maxsize=maxsize)
+
+    def put(self, rid: str, shm_name: str, mm_inputs: Optional[Dict[str, Any]]) -> None:
+        """Put a request with multimodal inputs into the queue.
+
+        Args:
+            rid: Request ID
+            shm_name: Shared memory segment name for metadata
+            mm_inputs: Multimodal inputs dict (can contain torch tensors)
+        """
+        # Make tensors shareable if present
+        if mm_inputs is not None:
+            mm_inputs = self._make_tensors_shareable(mm_inputs)
+
+        self._queue.put((rid, shm_name, mm_inputs))
+        logger.debug(f"Put request {rid} into tensor queue (shm={shm_name})")
+
+    def get(
+        self, timeout: Optional[float] = None
+    ) -> tuple[str, str, Optional[Dict[str, Any]]]:
+        """Get a request from the queue.
+
+        Args:
+            timeout: Timeout in seconds (None for blocking indefinitely)
+
+        Returns:
+            Tuple of (rid, shm_name, mm_inputs)
+        """
+        rid, shm_name, mm_inputs = self._queue.get(timeout=timeout)
+        logger.debug(f"Got request {rid} from tensor queue (shm={shm_name})")
+        return rid, shm_name, mm_inputs
+
+    def empty(self) -> bool:
+        """Check if the queue is empty."""
+        return self._queue.empty()
+
+    def qsize(self) -> int:
+        """Return the approximate size of the queue."""
+        try:
+            return self._queue.qsize()
+        except NotImplementedError:
+            return 0  # Some platforms don't support qsize
+
+    def close(self) -> None:
+        """Close the queue."""
+        self._queue.close()
+
+    @staticmethod
+    def _make_tensors_shareable(data: Any) -> Any:
+        """Recursively make all torch tensors in a data structure shareable.
+
+        Args:
+            data: Nested dict/list/tensor structure
+
+        Returns:
+            The same structure with tensors made shareable via share_memory_()
+        """
+        if isinstance(data, torch.Tensor):
+            # Make tensor shareable across processes
+            if not data.is_shared():
+                data = data.share_memory_()
+            return data
+        elif isinstance(data, dict):
+            return {k: TensorQueue._make_tensors_shareable(v) for k, v in data.items()}
+        elif isinstance(data, (list, tuple)):
+            result = [TensorQueue._make_tensors_shareable(item) for item in data]
+            return type(data)(result)
+        else:
+            return data
diff --git a/pymllm/orchestrator/tokenizer_process.py b/pymllm/orchestrator/tokenizer_process.py
index 53714bb6..43db5ba0 100644
--- a/pymllm/orchestrator/tokenizer_process.py
+++ b/pymllm/orchestrator/tokenizer_process.py
@@ -3,15 +3,22 @@
 
 Receives raw requests from RequestResponseProcess via ZMQ, tokenizes them,
 and forwards the tokenized payloads to the SchedulerProcess.
+
+Supports two modes:
+    1. Legacy ZMQ path: Send TokenizedGenerateReqInput via ZMQ send_pyobj
+    2. Shared queue fast path: Write metadata to shared memory and put rid in shared queue
 """
 
 import logging
 from multiprocessing.connection import Connection
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional, Union
 
 import zmq
+from transformers import AutoProcessor, AutoTokenizer
 
+from pymllm.engine.io_struct import TokenizedGenerateReqInput
 from pymllm.orchestrator.ipc_utils import create_zmq_socket
+from pymllm.orchestrator.shared_memory_queue import SharedMemoryManager, TensorQueue
 
 logger = logging.getLogger(__name__)
 
@@ -23,16 +30,42 @@ def __init__(
         self,
         recv_from_rr_addr: str,
         send_to_scheduler_addr: str,
+        tokenizer_cfg: Dict[str, Any],
+        shared_queue: Optional[TensorQueue] = None,
     ):
+        """
+        Parameters
+        ----------
+        tokenizer_cfg:
+            Serialisable dict built by the parent process (``Engine``) before
+            spawning.  Required keys:
+
+            * ``tokenizer_path``    – str, path to the tokenizer directory.
+            * ``tokenizer_mode``    – ``"auto" | "slow" | "fast"``.
+            * ``trust_remote_code`` – bool.
+            * ``context_length``    – Optional[int], explicit cap; inferred from
+              ``hf_config`` when ``None``.
+            * ``hf_config``         – Optional HuggingFace PretrainedConfig
+              (pickled by multiprocessing); used only to infer ``context_length``.
+            * ``enable_shared_queue`` – bool, whether to use shared memory fast path.
+        shared_queue:
+            Optional TensorQueue for shared memory fast path communication.
+        """
         self._recv_from_rr_addr = recv_from_rr_addr
         self._send_to_scheduler_addr = send_to_scheduler_addr
+        self._tokenizer_cfg = tokenizer_cfg
+        self._enable_shared_queue = tokenizer_cfg.get("enable_shared_queue", False)
+        self._shared_queue = shared_queue
 
-        self._zmq_ctx: zmq.Context = None
-        self._recv_from_rr: zmq.Socket = None
-        self._send_to_scheduler: zmq.Socket = None
+        self._zmq_ctx: Optional[zmq.Context] = None
+        self._recv_from_rr: Optional[zmq.Socket] = None
+        self._send_to_scheduler: Optional[zmq.Socket] = None
 
-        # TODO: initialise the actual tokenizer (HuggingFace / custom)
         self._tokenizer = None
+        self._mm_processor = None
+        self._context_length: Optional[int] = None
+
+        self._init_tokenizers()
 
     # ------------------------------------------------------------------
     # Lifecycle
@@ -55,29 +88,269 @@ def init_sockets(self) -> None:
 
     def event_loop(self) -> None:
         """Infinite loop: recv raw request -> tokenize -> send to scheduler."""
-        logger.info("TokenizerProcess event loop started")
+        logger.info(
+            "TokenizerProcess event loop started (shared_queue=%s)",
+            self._enable_shared_queue,
+        )
         while True:
             raw_request: Dict[str, Any] = self._recv_from_rr.recv_pyobj()
             tokenized = self._tokenize(raw_request)
+
+            if self._enable_shared_queue and self._shared_queue is not None:
+                # Shared queue fast path
+                self._send_via_shared_queue(tokenized)
+            else:
+                # Legacy ZMQ path
+                self._send_to_scheduler.send_pyobj(tokenized)
+
+    def _send_via_shared_queue(
+        self, tokenized: Union[TokenizedGenerateReqInput, Dict[str, Any]]
+    ) -> None:
+        """Send tokenized request via shared memory + shared queue fast path.
+
+        Args:
+            tokenized: Either TokenizedGenerateReqInput dataclass or abort dict
+        """
+        # Handle abort sentinel
+        if isinstance(tokenized, dict) and tokenized.get("abort"):
+            # Fallback to ZMQ for abort messages
             self._send_to_scheduler.send_pyobj(tokenized)
+            return
+
+        assert isinstance(tokenized, TokenizedGenerateReqInput), (
+            f"Expected TokenizedGenerateReqInput, got {type(tokenized)}"
+        )
+
+        rid = tokenized.rid
+        mm_inputs = tokenized.mm_inputs
+
+        # Create a lightweight metadata object (without mm_inputs)
+        metadata = TokenizedGenerateReqInput(
+            rid=tokenized.rid,
+            input_text=tokenized.input_text,
+            input_ids=tokenized.input_ids,
+            mm_inputs=None,  # Will be passed separately via shared queue
+            sampling_params=tokenized.sampling_params,
+            stream=tokenized.stream,
+            return_logprob=tokenized.return_logprob,
+            logprob_start_len=tokenized.logprob_start_len,
+            top_logprobs_num=tokenized.top_logprobs_num,
+            lora_path=tokenized.lora_path,
+            session_params=tokenized.session_params,
+        )
+
+        # Write metadata to shared memory
+        shm_name = SharedMemoryManager.write_metadata(rid, metadata)
+
+        # Put (rid, shm_name, mm_inputs) into shared queue
+        self._shared_queue.put(rid, shm_name, mm_inputs)
+
+        logger.debug(f"Sent request {rid} via shared queue (shm={shm_name})")
 
     # ------------------------------------------------------------------
-    # Tokenization (placeholder)
+    # Tokenization and multimodal preprocessing
     # ------------------------------------------------------------------
 
-    def _tokenize(self, raw_request: Dict[str, Any]) -> Dict[str, Any]:
-        """Tokenize a single raw request and return the tokenized payload.
+    def _init_tokenizers(self) -> None:
+        """Initialise text tokenizer and (optionally) multimodal processor.
 
-        TODO: replace with real tokenizer call.
+        All configuration is read from ``self._tokenizer_cfg`` which was
+        serialised by the parent process before ``spawn``.  No global config
+        access happens inside the subprocess.
         """
-        text = raw_request.get("text", "")
-        # placeholder: produce fake token ids
-        input_ids: List[int] = []  # TODO: self._tokenizer.encode(text)
-        return {
-            **raw_request,
-            "input_ids": input_ids,
+        cfg = self._tokenizer_cfg
+        tokenizer_path: str = cfg["tokenizer_path"]
+        tokenizer_mode: str = cfg.get("tokenizer_mode", "auto")
+        trust_remote_code: bool = bool(cfg.get("trust_remote_code", False))
+
+        tokenizer_kwargs: Dict[str, Any] = {
+            "use_fast": tokenizer_mode != "slow",
+            "trust_remote_code": trust_remote_code,
         }
 
+        self._tokenizer = AutoTokenizer.from_pretrained(
+            tokenizer_path,
+            **tokenizer_kwargs,
+        )
+
+        # Default to left padding for generation.
+        try:
+            self._tokenizer.padding_side = "left"
+        except Exception:
+            pass
+
+        # Context length: explicit config value takes priority; fall back to
+        # common HF config field names.
+        context_len: Optional[int] = cfg.get("context_length")
+        if context_len is None:
+            hf_cfg = cfg.get("hf_config")
+            for name in ("max_position_embeddings", "max_sequence_length", "seq_len"):
+                if hf_cfg is not None and hasattr(hf_cfg, name):
+                    context_len = int(getattr(hf_cfg, name))
+                    break
+        self._context_length = context_len
+
+        # Try to load multimodal processor (optional).
+        try:
+            self._mm_processor = AutoProcessor.from_pretrained(
+                tokenizer_path,
+                trust_remote_code=trust_remote_code,
+            )
+        except Exception:
+            # Text-only models don't provide a processor; that's fine.
+            self._mm_processor = None
+
+    def _tokenize(
+        self, raw_request: Dict[str, Any]
+    ) -> Union[TokenizedGenerateReqInput, Dict[str, Any]]:
+        """Tokenize one raw request dict and return a typed object.
+
+        * **Abort** messages (``{"rid": ..., "abort": True}``) are returned as
+          plain dicts so the scheduler can intercept them without importing the
+          io_struct.
+        * Normal requests are returned as a :class:`TokenizedGenerateReqInput`
+          dataclass instance that carries ``input_ids``, ``mm_inputs``, and all
+          sampling meta-data in typed fields.
+
+        Each message arriving here corresponds to exactly one sub-request
+        because batch splitting happens upstream in ``RequestResponseProcess``.
+        """
+        # Abort: propagate as a plain sentinel dict.
+        if raw_request.get("abort"):
+            return {"rid": raw_request.get("rid"), "abort": True}
+
+        # ------------------------------------------------------------------ #
+        # 1. Text tokenization
+        # ------------------------------------------------------------------ #
+        if raw_request.get("input_ids") is not None:
+            # Caller already tokenized – skip text processing.
+            input_ids: List[int] = list(raw_request["input_ids"])
+            raw_text = raw_request.get("text")
+            input_text: str = (
+                str(raw_text[0]) if isinstance(raw_text, list) else str(raw_text or "")
+            )
+        else:
+            text = raw_request.get("text")
+            if text is None:
+                raise ValueError(
+                    "TokenizerProcess expects either `text` or `input_ids`."
+                )
+            # Accept a list for robustness; take the first element.
+            input_text = str(text[0]) if isinstance(text, list) else str(text)
+
+            encode_kwargs: Dict[str, Any] = {
+                "add_special_tokens": True,
+                "return_attention_mask": False,
+            }
+            if self._context_length is not None:
+                encode_kwargs.update(
+                    {"truncation": True, "max_length": self._context_length}
+                )
+
+            encoding = self._tokenizer(input_text, **encode_kwargs)
+            input_ids = encoding["input_ids"]
+
+        # ------------------------------------------------------------------ #
+        # 2. Multimodal pre-processing
+        # ------------------------------------------------------------------ #
+        mm_inputs = self._collect_mm_inputs(raw_request, text=input_text)
+
+        # ------------------------------------------------------------------ #
+        # 3. Pack into the typed dataclass
+        # ------------------------------------------------------------------ #
+        return TokenizedGenerateReqInput(
+            rid=raw_request.get("rid"),
+            input_text=input_text,
+            input_ids=input_ids,
+            mm_inputs=mm_inputs,
+            sampling_params=raw_request.get("sampling_params") or {},
+            stream=bool(raw_request.get("stream", False)),
+            return_logprob=bool(raw_request.get("return_logprob", False)),
+            logprob_start_len=int(raw_request.get("logprob_start_len", -1)),
+            top_logprobs_num=int(raw_request.get("top_logprobs_num", 0)),
+            lora_path=raw_request.get("lora_path"),
+            session_params=raw_request.get("session_params"),
+        )
+
+    def _normalize_image_input(self, image_data: Any) -> List[Any]:
+        """Normalise ``image_data`` into a list of image-like objects.
+
+        Supported input forms:
+        - single PIL.Image / numpy array / torch.Tensor
+        - path string or bytes
+        - list/tuple of the above
+        """
+
+        def _to_image(obj: Any) -> Any:
+            # Lazily import Pillow to avoid hard dependency for text-only models.
+            try:
+                from PIL import Image  # type: ignore
+            except Exception as exc:  # pragma: no cover - optional dependency
+                raise RuntimeError(
+                    "Pillow is required for image preprocessing in TokenizerProcess"
+                ) from exc
+
+            if obj is None:
+                return None
+            if isinstance(obj, Image.Image):
+                return obj
+            if isinstance(obj, (str, bytes)):
+                return Image.open(obj)
+            return obj
+
+        if isinstance(image_data, (list, tuple)):
+            return [
+                img for img in (_to_image(x) for x in image_data) if img is not None
+            ]
+        return [img for img in (_to_image(image_data),) if img is not None]
+
+    def _collect_mm_inputs(
+        self, raw_request: Dict[str, Any], text: Optional[str] = None
+    ) -> Optional[Dict[str, Any]]:
+        """Pre-process multimodal data and return a consolidated ``mm_inputs`` dict.
+
+        Returns ``None`` for text-only requests.  Otherwise returns a flat dict
+        whose keys are ready to be unpacked by the model runner:
+
+        * ``image_inputs``  – output of ``AutoProcessor`` (contains
+          ``pixel_values``, etc.) when a processor is available.
+        * ``image_data``    – raw image objects when no processor is available.
+        * ``audio_data``    – forwarded verbatim (no processor yet).
+        * ``video_data``    – forwarded verbatim (no processor yet).
+        """
+        image_data = raw_request.get("image_data")
+        video_data = raw_request.get("video_data")
+        audio_data = raw_request.get("audio_data")
+
+        if not any(x is not None for x in (image_data, video_data, audio_data)):
+            return None  # text-only request
+
+        mm: Dict[str, Any] = {}
+
+        # Image: prefer AutoProcessor output; fall back to raw data.
+        if image_data is not None:
+            if self._mm_processor is not None:
+                images = self._normalize_image_input(image_data)
+                try:
+                    processor_inputs = self._mm_processor(
+                        images=images,
+                        text=text if text is not None else raw_request.get("text"),
+                        return_tensors="pt",
+                    )
+                    mm["image_inputs"] = processor_inputs
+                except Exception:
+                    mm["image_data"] = image_data
+            else:
+                mm["image_data"] = image_data
+
+        # Audio / video forwarded verbatim for now.
+        if audio_data is not None:
+            mm["audio_data"] = audio_data
+        if video_data is not None:
+            mm["video_data"] = video_data
+
+        return mm
+
     def shutdown(self) -> None:
         if self._recv_from_rr is not None:
             self._recv_from_rr.close()
@@ -91,9 +364,13 @@ def run_tokenizer_process(
     recv_from_rr_addr: str,
     send_to_scheduler_addr: str,
     pipe_writer: Connection,
+    tokenizer_cfg: Dict[str, Any],
+    shared_queue: Optional[TensorQueue] = None,
 ) -> None:
     """Entry point for ``torch.multiprocessing.Process(target=...)``."""
-    proc = TokenizerProcess(recv_from_rr_addr, send_to_scheduler_addr)
+    proc = TokenizerProcess(
+        recv_from_rr_addr, send_to_scheduler_addr, tokenizer_cfg, shared_queue
+    )
     proc.init_sockets()
 
     # Signal readiness to the parent process
diff --git a/pyproject.toml b/pyproject.toml
index d417b579..d752ddc1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,8 @@ dependencies=[
   "typer",
   "torch",
   "torchao",
+  "pyfiglet",
+  "termcolor",
 ]
 
 [project.optional-dependencies]

From b057360804e0b54a329faf81c73f2c84aceb1082 Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Mon, 2 Mar 2026 06:45:16 +0000
Subject: [PATCH 11/13] feat(mllm-kernel): add high-performance
 create_kv_indices CUDA kernel and benchmark

- Implement CUDA kernel to convert ReqToTokenPool mapping into flat KV index arrays
- Use block-per-sequence parallelism for fully coalesced memory access
- Validate tensor shapes, dtypes, and devices with TensorMatcher utilities
- Provide Python JIT wrapper using mllm-kernel JIT system for easy integration
- Add detailed documentation and usage guide for kernel implementation
- Create benchmark script comparing kernel against naive PyTorch gather
- Support optional start offsets for sliding-window decode scenarios
- Ensure robust out-of-bounds checks to prevent segmentation faults
- Establish testing and benchmarking patterns for future kernel development
---
 .claude/skills/impl-jit-kernel/SKILL.md       | 486 +++++++++
 .../benchmarks/bench_create_kv_indices.py     | 218 ++++
 .../cuda/csrc/create_kv_indices.cuh           | 282 +++++
 .../mllm_kernel/cuda/csrc/vocab_embedding.cuh |   0
 .../mllm_kernel/cuda/jit/create_kv_indices.py | 118 +++
 mllm-kernel/pyproject.toml                    |   2 +-
 mllm-kernel/tests/test_create_kv_indices.py   | 191 ++++
 pymllm/configs/server_config.py               |  45 +
 pymllm/engine/__init__.py                     |   8 +
 pymllm/engine/forward_batch.py                | 182 ++++
 pymllm/engine/launch.py                       |  32 +-
 pymllm/layers/attention/__init__.py           |  25 +
 pymllm/layers/attention/attention_backend.py  | 143 +++
 pymllm/layers/attention/flashinfer_backend.py | 964 ++++++++++++++++++
 pymllm/layers/attention/radix_attention.py    | 171 ++++
 pymllm/layers/sampling.py                     |   0
 pymllm/mem_cache/memory_pool.py               |  16 +-
 pymllm/orchestrator/cuda_ipc_transport.py     | 859 ++++++++++------
 pymllm/orchestrator/scheduler_process.py      |  50 +-
 pymllm/orchestrator/shared_memory_queue.py    | 226 ++--
 pymllm/orchestrator/tokenizer_process.py      | 153 ++-
 21 files changed, 3773 insertions(+), 398 deletions(-)
 create mode 100644 .claude/skills/impl-jit-kernel/SKILL.md
 create mode 100644 mllm-kernel/benchmarks/bench_create_kv_indices.py
 create mode 100644 mllm-kernel/mllm_kernel/cuda/csrc/create_kv_indices.cuh
 create mode 100644 mllm-kernel/mllm_kernel/cuda/csrc/vocab_embedding.cuh
 create mode 100644 mllm-kernel/mllm_kernel/cuda/jit/create_kv_indices.py
 create mode 100644 mllm-kernel/tests/test_create_kv_indices.py
 create mode 100644 pymllm/layers/sampling.py

diff --git a/.claude/skills/impl-jit-kernel/SKILL.md b/.claude/skills/impl-jit-kernel/SKILL.md
new file mode 100644
index 00000000..39cc02b6
--- /dev/null
+++ b/.claude/skills/impl-jit-kernel/SKILL.md
@@ -0,0 +1,486 @@
+---
+name: impl-jit-kernel
+description: Guide for implementing CUDA or CPU JIT kernels in mllm-kernel. Use when the user asks to create, add, or implement a new kernel in mllm-kernel.
+---
+
+# Implementing a JIT Kernel in mllm-kernel
+
+## Overview
+
+mllm-kernel uses a JIT (Just-In-Time) compilation system built on `tvm_ffi`. Kernels are written in C++20 (`.cuh` for CUDA, `.cpp` for CPU), validated at runtime via `TensorMatcher`, and exposed to Python through a `@jit` decorator. No pre-compilation is needed -- kernels compile on first call and are cached at `~/.cache/mllm_kernel/`.
+
+## File Layout
+
+For a kernel named `my_kernel`:
+
+```
+mllm-kernel/
+  mllm_kernel/
+    cuda/
+      csrc/my_kernel.cuh          # CUDA kernel implementation
+      jit/my_kernel.py            # Python JIT wrapper
+      jit/__init__.py             # Add export here
+    cpu/
+      csrc/my_kernel.cpp          # CPU kernel implementation (Highway SIMD)
+      include/mllm_kernel/cpu/
+        my_kernel.hpp             # CPU SIMD body (NO #pragma once)
+      jit/my_kernel.py            # Python JIT wrapper
+      jit/__init__.py             # Add export here
+  tests/test_my_kernel.py         # Pytest correctness tests
+  benchmarks/bench_my_kernel.py   # Profiler benchmark vs PyTorch reference
+```
+
+---
+
+## CUDA Kernel Walkthrough
+
+### Step 1: Write the `.cuh` kernel
+
+Create `mllm_kernel/cuda/csrc/my_kernel.cuh`:
+
+```cpp
+#pragma once
+
+#include <mllm_kernel/tensor.hpp>   // TensorMatcher, SymbolicSize, SymbolicDevice, SymbolicDType
+#include <mllm_kernel/utils.hpp>    // RuntimeCheck, Panic, div_ceil
+#include <mllm_kernel/utils.cuh>    // LaunchKernel, fp16_t, bf16_t, PDL helpers
+
+#include <dlpack/dlpack.h>
+#include <tvm/ffi/container/tensor.h>
+
+#include <cstdint>
+
+namespace {
+
+// ---------------------------------------------------------------------------
+// 1. Parameter struct (trivially copyable, passed to kernel by value)
+// ---------------------------------------------------------------------------
+struct MyKernelParams {
+  const float* __restrict__ input;
+  float*       __restrict__ output;
+  int32_t num_elements;
+};
+
+// ---------------------------------------------------------------------------
+// 2. CUDA kernel
+// ---------------------------------------------------------------------------
+__global__ void my_kernel(const MyKernelParams params) {
+  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idx >= params.num_elements) return;
+  params.output[idx] = params.input[idx] * 2.0f;
+}
+
+// ---------------------------------------------------------------------------
+// 3. Host-side launcher (entry point for TVM FFI binding)
+// ---------------------------------------------------------------------------
+struct MyKernel {
+  static void run(tvm::ffi::TensorView input, tvm::ffi::TensorView output) {
+    using namespace mllm_kernel::host;
+
+    // --- Validate tensors ---
+    SymbolicSize N{"num_elements"};
+    SymbolicDevice device;
+
+    (void)TensorMatcher({N})
+        .with_dtype<float>()
+        .with_device<kDLCUDA>(device)
+        .verify(input);
+
+    (void)TensorMatcher({N})
+        .with_dtype<float>()
+        .with_device(device)
+        .verify(output);
+
+    const int64_t n = N.unwrap();
+    RuntimeCheck(n > 0, "num_elements must be positive, got ", n);
+
+    // --- Build params ---
+    MyKernelParams params{
+        .input  = static_cast<const float*>(input.data_ptr()),
+        .output = static_cast<float*>(output.data_ptr()),
+        .num_elements = static_cast<int32_t>(n),
+    };
+
+    // --- Launch ---
+    constexpr int kBlock = 256;
+    const int grid = static_cast<int>(div_ceil(n, kBlock));
+    LaunchKernel(grid, kBlock, device.unwrap())(my_kernel, params);
+  }
+};
+
+}  // namespace
+```
+
+**Key rules:**
+
+- **Always wrap in `namespace {}`** (anonymous namespace).
+- **Entry point** is a `static void run(tvm::ffi::TensorView ...)` method.
+- **Validate every tensor** with `TensorMatcher` before reading `.data_ptr()`.
+- **Never dereference device pointers on host** -- `data_ptr()` returns a GPU pointer.
+- **Use `LaunchKernel`** to launch -- it handles stream resolution and error checking.
+
+### Step 2: Write the Python JIT wrapper
+
+Create `mllm_kernel/cuda/jit/my_kernel.py`:
+
+```python
+"""JIT wrapper for my_kernel CUDA kernel."""
+
+import torch
+from mllm_kernel.jit_utils import jit
+
+
+@jit(
+    args=[],
+    device="cuda",
+    cuda_files=["my_kernel.cuh"],
+    cpp_wrappers=[],
+    cuda_wrappers=[("my_kernel", "MyKernel::run")],
+    func_name="my_kernel",
+)
+def _kernel(compiled_module, input: torch.Tensor, output: torch.Tensor) -> None:
+    compiled_module.my_kernel(input, output)
+
+
+def my_kernel(input: torch.Tensor) -> torch.Tensor:
+    """Double every element in *input*.
+
+    Parameters
+    ----------
+    input : torch.Tensor
+        1-D float32 tensor on CUDA.
+
+    Returns
+    -------
+    torch.Tensor
+        Same shape and dtype as *input*.
+    """
+    output = torch.empty_like(input)
+    _kernel(input, output)
+    return output
+```
+
+### Step 3: Export in `__init__.py`
+
+Edit `mllm_kernel/cuda/jit/__init__.py` and add:
+
+```python
+from mllm_kernel.cuda.jit.my_kernel import my_kernel
+```
+
+### Step 4: Clear JIT cache after editing `.cuh`
+
+Any time you modify the `.cuh` file, delete the cached `.so`:
+
+```bash
+rm -rf ~/.cache/mllm_kernel/cuda_my_kernel*
+```
+
+The next Python call will trigger recompilation automatically.
+
+---
+
+## Template-Parameterized CUDA Kernels
+
+When the kernel takes compile-time constants (e.g. block size, dtype), use `make_cpp_args`:
+
+```python
+from mllm_kernel.jit_utils import jit, make_cpp_args
+
+def _make_kernel(block_size: int, use_pdl: bool):
+    cpp_args = make_cpp_args(block_size, use_pdl)  # -> "256, true"
+
+    @jit(
+        args=[block_size, use_pdl],
+        device="cuda",
+        cuda_files=["my_kernel.cuh"],
+        cpp_wrappers=[],
+        cuda_wrappers=[("my_kernel", f"MyKernel<{cpp_args}>::run")],
+        func_name="my_kernel",
+    )
+    def _kernel(compiled_module, input, output):
+        compiled_module.my_kernel(input, output)
+    return _kernel
+```
+
+`make_cpp_args` converts Python types to C++ literals:
+- `int/float` -> string literal
+- `bool` -> `"true"` / `"false"`
+- `torch.dtype` -> C++ type (`torch.float32` -> `"fp32_t"`, `torch.float16` -> `"fp16_t"`, `torch.bfloat16` -> `"bf16_t"`, `torch.int32` -> `"int32_t"`, etc.)
+
+---
+
+## CPU Kernel Walkthrough
+
+CPU kernels use **Google Highway** for portable SIMD. The key difference: the `.hpp` body is included **multiple times** by Highway's `foreach_target` dispatch, so it must NOT have `#pragma once`.
+
+### Step 1: Write the SIMD body (`.hpp`)
+
+Create `mllm_kernel/cpu/include/mllm_kernel/cpu/my_kernel.hpp`:
+
+```cpp
+// NOTE: NO #pragma once -- this file is included multiple times by Highway.
+
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace mllm_kernel::cpu {
+namespace HWY_NAMESPACE {
+namespace hn = hwy::HWY_NAMESPACE;
+
+template <int Constant>
+inline void my_kernel_impl(float* HWY_RESTRICT dst,
+                           const float* HWY_RESTRICT src,
+                           size_t count) {
+  const hn::ScalableTag<float> d;
+  const size_t lanes = hn::Lanes(d);
+  const auto vc = hn::Set(d, static_cast<float>(Constant));
+  size_t i = 0;
+  for (; i + lanes <= count; i += lanes) {
+    const auto v = hn::Load(d, src + i);
+    hn::Store(hn::Add(v, vc), d, dst + i);
+  }
+  for (; i < count; ++i) {
+    dst[i] = src[i] + static_cast<float>(Constant);
+  }
+}
+
+// Named entry points for HWY_EXPORT
+static HWY_NOINLINE HWY_MAYBE_UNUSED void my_kernel_1(float* d, const float* s, size_t n) {
+  my_kernel_impl<1>(d, s, n);
+}
+
+}  // namespace HWY_NAMESPACE
+}  // namespace mllm_kernel::cpu
+HWY_AFTER_NAMESPACE();
+```
+
+### Step 2: Write the `.cpp` source
+
+Create `mllm_kernel/cpu/csrc/my_kernel.cpp`:
+
+```cpp
+#include <mllm_kernel/tensor.hpp>
+#include <mllm_kernel/utils.hpp>
+#include <tvm/ffi/container/tensor.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "../csrc/my_kernel.cpp"
+#include <hwy/foreach_target.h>
+
+#include <mllm_kernel/cpu/my_kernel.hpp>
+
+#if HWY_ONCE
+#include <hwy/targets.cc>
+#endif
+
+namespace mllm_kernel::cpu {
+#if HWY_ONCE
+
+HWY_EXPORT(my_kernel_1);
+
+template <int Constant>
+void my_kernel(tvm::ffi::TensorView dst, tvm::ffi::TensorView src) {
+  using namespace mllm_kernel::host;
+  SymbolicSize N{"num_elements"};
+  SymbolicDevice device_;
+  (void)TensorMatcher({N})
+      .with_dtype<float>()
+      .with_device<kDLCPU>(device_)
+      .verify(dst)
+      .verify(src);
+  const size_t n = N.unwrap();
+  auto* dst_ptr = static_cast<float*>(dst.data_ptr());
+  const auto* src_ptr = static_cast<const float*>(src.data_ptr());
+  HWY_DYNAMIC_DISPATCH(my_kernel_1)(dst_ptr, src_ptr, n);
+}
+
+// Explicit instantiation
+template void my_kernel<1>(tvm::ffi::TensorView, tvm::ffi::TensorView);
+
+#endif
+}  // namespace mllm_kernel::cpu
+```
+
+### Step 3: Write the Python JIT wrapper
+
+Create `mllm_kernel/cpu/jit/my_kernel.py`:
+
+```python
+import torch
+from mllm_kernel.jit_utils import jit
+
+@jit(
+    args=1,
+    device="cpu",
+    cpp_files=["my_kernel.cpp"],
+    cpp_wrappers=[("my_kernel", "mllm_kernel::cpu::my_kernel<1>")],
+    func_name="my_kernel",
+)
+def _kernel_1(compiled_module, dst, src):
+    compiled_module.my_kernel(dst, src)
+
+def my_kernel(src: torch.Tensor) -> torch.Tensor:
+    dst = torch.empty_like(src)
+    _kernel_1(dst, src)
+    return dst
+```
+
+**Key CPU differences from CUDA:**
+
+| Aspect | CUDA | CPU |
+|--------|------|-----|
+| Source file | `.cuh` in `cuda/csrc/` | `.cpp` + `.hpp` in `cpu/csrc/` and `cpu/include/` |
+| Namespace | Anonymous `namespace {}` | `mllm_kernel::cpu` |
+| Device check | `with_device<kDLCUDA>` | `with_device<kDLCPU>` |
+| Launch | `LaunchKernel(grid, block, device)(...)` | Direct function call via `HWY_DYNAMIC_DISPATCH` |
+| SIMD | CUDA warps | Highway `ScalableTag<T>` |
+| Wrapper fields | `cuda_files`, `cuda_wrappers` | `cpp_files`, `cpp_wrappers` |
+| Wrapper name | `"MyKernel::run"` | `"mllm_kernel::cpu::my_kernel<1>"` (fully qualified) |
+
+---
+
+## TensorMatcher Reference
+
+`TensorMatcher` validates shape, dtype, device, and strides of `tvm::ffi::TensorView` arguments.
+
+```cpp
+using namespace mllm_kernel::host;
+
+// Symbolic dimensions -- bind on first .verify(), check consistency on subsequent calls
+SymbolicSize B{"batch"}, N{"seq_len"}, D{"dim"};
+SymbolicSize Stride0{"stride0"};
+SymbolicDType dtype;
+SymbolicDevice device;
+
+// Shape [B, N, D], contiguous, float32, on CUDA
+(void)TensorMatcher({B, N, D})
+    .with_dtype<float>(dtype)
+    .with_device<kDLCUDA>(device)
+    .verify(tensor_a);
+
+// Shape [B, N, D], same dtype and device (already bound)
+(void)TensorMatcher({B, N, D})
+    .with_dtype(dtype)
+    .with_device(device)
+    .verify(tensor_b);
+
+// Shape [B, D] with explicit strides (non-contiguous OK)
+(void)TensorMatcher({B, D})
+    .with_strides({Stride0, 1})
+    .with_dtype<int32_t>()
+    .with_device(device)
+    .verify(indices);
+
+// Multiple acceptable dtypes
+SymbolicDType flex_dtype;
+(void)TensorMatcher({N})
+    .with_dtype<float, __half, __nv_bfloat16>(flex_dtype)
+    .with_device(device)
+    .verify(mixed_tensor);
+
+// Extract bound values
+int64_t batch = B.unwrap();
+int64_t dim   = D.unwrap();
+DLDevice dev  = device.unwrap();
+```
+
+---
+
+## LaunchKernel Reference
+
+```cpp
+using namespace mllm_kernel::host;
+
+// Basic launch (resolves CUDA stream from DLDevice)
+DLDevice dev = device.unwrap();
+LaunchKernel(grid_dim, block_dim, dev)(kernel_func, param_struct);
+
+// With shared memory
+LaunchKernel(grid, block, dev, shared_mem_bytes)(kernel, params);
+
+// With PDL (Programmatic Dependent Launch, sm_90+)
+LaunchKernel(grid, block, dev).enable_pdl(true)(kernel, params);
+```
+
+---
+
+## Utility Reference (`mllm_kernel::host`)
+
+| Function | Description |
+|----------|-------------|
+| `RuntimeCheck(cond, msg...)` | Throws `PanicError` if `cond` is false |
+| `Panic(msg...)` | Always throws (unreachable code) |
+| `div_ceil(a, b)` | Integer ceiling division |
+| `dtype_bytes(DLDataType)` | Byte size of a DLPack dtype |
+
+CUDA-only (`mllm_kernel::device`):
+
+| Symbol | Value |
+|--------|-------|
+| `kWarpThreads` | 32 |
+| `kFullMask` | 0xffffffff |
+| `fp16_t` | `__half` |
+| `bf16_t` | `__nv_bfloat16` |
+
+---
+
+## Testing Pattern
+
+Create `tests/test_my_kernel.py`:
+
+```python
+import pytest
+import torch
+from mllm_kernel.cuda.jit.my_kernel import my_kernel
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA required")
+@pytest.mark.parametrize("n", [1, 128, 1024, 65536])
+def test_my_kernel(n):
+    x = torch.randn(n, dtype=torch.float32, device="cuda")
+    result = my_kernel(x)
+    torch.cuda.synchronize()
+    expected = x * 2.0
+    assert torch.allclose(result, expected)
+```
+
+Run:
+```bash
+pytest tests/test_my_kernel.py -v
+```
+
+---
+
+## Benchmark Pattern
+
+Create `benchmarks/bench_my_kernel.py`. Use `torch.profiler.profile` with `ProfilerActivity.CPU` and `ProfilerActivity.CUDA`. Compare the JIT kernel against a naive PyTorch implementation and report speedup.
+
+Run:
+```bash
+python benchmarks/bench_my_kernel.py --num-elements 1000000
+```
+
+---
+
+## Checklist for a New Kernel
+
+- [ ] `.cuh` / `.cpp` + `.hpp` kernel source created
+- [ ] `TensorMatcher` validates all tensor arguments (shape, dtype, device)
+- [ ] No host-side dereference of device pointers
+- [ ] Python `@jit` wrapper created with correct `cuda_wrappers` or `cpp_wrappers`
+- [ ] Public API function added (allocates output, calls internal `_kernel`)
+- [ ] Exported in `jit/__init__.py`
+- [ ] JIT cache cleared after `.cuh` edits (`rm -rf ~/.cache/mllm_kernel/cuda_<name>*`)
+- [ ] Pytest test with `@pytest.mark.parametrize` and PyTorch reference
+- [ ] Benchmark with `torch.profiler` (optional but recommended)
+
+---
+
+## Common Pitfalls
+
+1. **Segfault from dereferencing device pointer on host** -- `tensor.data_ptr()` returns a GPU pointer for CUDA tensors. Never read its contents in host code. Use `TensorMatcher` for validation instead.
+2. **Stale JIT cache** -- After editing `.cuh`, delete `~/.cache/mllm_kernel/cuda_<kernel_name>*/`. The old `.so` will be reused otherwise.
+3. **Missing `#include <hwy/targets.cc>`** -- CPU kernels must include this inside `#if HWY_ONCE` to provide `GetChosenTarget` for the JIT-built module.
+4. **`#pragma once` in Highway `.hpp`** -- Highway's `foreach_target` includes the file multiple times for different SIMD targets. `#pragma once` breaks this.
+5. **Wrong wrapper name** -- CUDA uses short names (`"MyKernel::run"`); CPU uses fully qualified names (`"mllm_kernel::cpu::my_kernel<1>"`).
+6. **Generator device mismatch in tests** -- `torch.randperm` needs a CUDA generator on CUDA; `torch.randint` only accepts CPU generators. Use separate generators.
diff --git a/mllm-kernel/benchmarks/bench_create_kv_indices.py b/mllm-kernel/benchmarks/bench_create_kv_indices.py
new file mode 100644
index 00000000..f570e66d
--- /dev/null
+++ b/mllm-kernel/benchmarks/bench_create_kv_indices.py
@@ -0,0 +1,218 @@
+"""Benchmark create_kv_indices vs naive torch gather using torch.profiler.
+
+Example:
+    python benchmarks/bench_create_kv_indices.py --batch-size 512 --max-reqs 2048 --max-ctx 4096
+"""
+
+from __future__ import annotations
+
+import argparse
+
+import torch
+from torch.profiler import ProfilerActivity, profile
+
+from mllm_kernel.cuda.jit.create_kv_indices import create_kv_indices
+
+
+def _make_batch(
+    *,
+    max_reqs: int,
+    max_ctx: int,
+    batch_size: int,
+    use_start_offsets: bool,
+    device: torch.device,
+    seed: int,
+):
+    g_cuda = torch.Generator(device=device).manual_seed(seed)
+    g_cpu = torch.Generator(device="cpu").manual_seed(seed)
+
+    req_to_token = torch.arange(
+        max_reqs * max_ctx, dtype=torch.int32, device=device
+    ).reshape(max_reqs, max_ctx)
+
+    assert batch_size <= max_reqs
+    req_pool_indices = torch.randperm(max_reqs, generator=g_cuda, device=device)[
+        :batch_size
+    ].to(torch.int32)
+
+    page_kernel_lens_list = []
+    kv_start_idx_list = []
+    for _ in range(batch_size):
+        L = int(torch.randint(1, max_ctx, (1,), generator=g_cpu).item())
+        if use_start_offsets:
+            start_max = max_ctx - L
+            start = int(torch.randint(0, max(start_max, 1), (1,), generator=g_cpu).item())
+        else:
+            start = 0
+        page_kernel_lens_list.append(L)
+        kv_start_idx_list.append(start)
+
+    page_kernel_lens = torch.tensor(
+        page_kernel_lens_list, dtype=torch.int32, device=device
+    )
+    kv_start_idx = torch.tensor(kv_start_idx_list, dtype=torch.int32, device=device)
+
+    kv_indptr = torch.empty(batch_size + 1, dtype=torch.int32, device=device)
+    kv_indptr[0] = 0
+    kv_indptr[1:] = torch.cumsum(page_kernel_lens, dim=0)
+
+    kv_indices = torch.empty(
+        int(kv_indptr[-1].item()), dtype=torch.int32, device=device
+    )
+
+    return (
+        req_to_token,
+        req_pool_indices,
+        page_kernel_lens,
+        kv_indptr,
+        kv_start_idx,
+        kv_indices,
+    )
+
+
+def _profile(
+    name: str, fn, *, warmup: int, iters: int, row_limit: int, trace_path: str | None
+):
+    for _ in range(warmup):
+        fn()
+    torch.cuda.synchronize()
+
+    with profile(
+        activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
+        record_shapes=False,
+        profile_memory=False,
+        with_stack=False,
+    ) as prof:
+        for _ in range(iters):
+            fn()
+    torch.cuda.synchronize()
+
+    events = prof.key_averages()
+    time_attr = (
+        "self_cuda_time_total"
+        if events and hasattr(events[0], "self_cuda_time_total")
+        else "self_device_time_total"
+    )
+    sort_key = (
+        "self_cuda_time_total"
+        if time_attr == "self_cuda_time_total"
+        else "self_device_time_total"
+    )
+    total_us = sum(float(getattr(evt, time_attr, 0.0)) for evt in events)
+    avg_us = total_us / max(iters, 1)
+
+    print(f"\n=== {name} ===")
+    print(
+        prof.key_averages().table(
+            sort_by=sort_key,
+            row_limit=row_limit,
+        )
+    )
+    print(f"{name} total self device time: {total_us:.2f} us")
+    print(f"{name} avg self device time/iter: {avg_us:.2f} us")
+
+    if trace_path:
+        prof.export_chrome_trace(trace_path)
+        print(f"{name} trace exported: {trace_path}")
+
+    return avg_us
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Benchmark create_kv_indices vs naive torch gather",
+    )
+    parser.add_argument("--batch-size", type=int, default=512)
+    parser.add_argument("--max-reqs", type=int, default=2048)
+    parser.add_argument("--max-ctx", type=int, default=4096)
+    parser.add_argument("--warmup", type=int, default=50)
+    parser.add_argument("--iters", type=int, default=200)
+    parser.add_argument("--row-limit", type=int, default=20)
+    parser.add_argument("--export-trace-dir", type=str, default="")
+    parser.add_argument("--seed", type=int, default=0)
+    parser.add_argument(
+        "--use-start-offsets",
+        action="store_true",
+        help="Enable non-zero kv_start_idx to emulate sliding-window decode",
+    )
+    args = parser.parse_args()
+
+    if not torch.cuda.is_available():
+        raise RuntimeError("CUDA is required for this benchmark")
+
+    torch.manual_seed(args.seed)
+    device = torch.device("cuda")
+
+    (
+        req_to_token,
+        req_pool_indices,
+        page_kernel_lens,
+        kv_indptr,
+        kv_start_idx,
+        kv_indices,
+    ) = _make_batch(
+        max_reqs=args.max_reqs,
+        max_ctx=args.max_ctx,
+        batch_size=args.batch_size,
+        use_start_offsets=args.use_start_offsets,
+        device=device,
+        seed=args.seed,
+    )
+
+    print("=== create_kv_indices profiler benchmark ===")
+    print(
+        f"batch_size={args.batch_size}, max_reqs={args.max_reqs}, max_ctx={args.max_ctx}, "
+        f"use_start_offsets={args.use_start_offsets}"
+    )
+    print(f"warmup={args.warmup}, iters={args.iters}, row_limit={args.row_limit}")
+
+    trace_dir = args.export_trace_dir.strip()
+    kernel_trace = f"{trace_dir}/create_kv_indices_trace.json" if trace_dir else None
+    torch_trace = f"{trace_dir}/torch_gather_trace.json" if trace_dir else None
+
+    def _run_kernel_once():
+        create_kv_indices(
+            req_to_token,
+            req_pool_indices,
+            page_kernel_lens,
+            kv_indptr,
+            kv_start_idx,
+            kv_indices,
+        )
+
+    def _run_torch_once():
+        # Torch reference implementation on device: gather per-sequence ranges
+        # from req_to_token into a flat buffer.
+        out = []
+        for i in range(args.batch_size):
+            req = req_pool_indices[i].item()
+            start = kv_start_idx[i].item() if args.use_start_offsets else 0
+            L = page_kernel_lens[i].item()
+            row = req_to_token[req, start : start + L]
+            out.append(row)
+        torch.cat(out, out=kv_indices)
+
+    kernel_avg_us = _profile(
+        "create_kv_indices",
+        _run_kernel_once,
+        warmup=args.warmup,
+        iters=args.iters,
+        row_limit=args.row_limit,
+        trace_path=kernel_trace,
+    )
+
+    torch_avg_us = _profile(
+        "torch_reference",
+        _run_torch_once,
+        warmup=args.warmup,
+        iters=args.iters,
+        row_limit=args.row_limit,
+        trace_path=torch_trace,
+    )
+
+    speedup = torch_avg_us / max(kernel_avg_us, 1e-12)
+    print(f"\nSpeedup: {speedup:.3f}x")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mllm-kernel/mllm_kernel/cuda/csrc/create_kv_indices.cuh b/mllm-kernel/mllm_kernel/cuda/csrc/create_kv_indices.cuh
new file mode 100644
index 00000000..0b9e4c88
--- /dev/null
+++ b/mllm-kernel/mllm_kernel/cuda/csrc/create_kv_indices.cuh
@@ -0,0 +1,282 @@
+// High-performance CUDA kernel to build FlashInfer KV index arrays from
+// pymllm's ReqToTokenPool mapping table.
+//
+// This is the CUDA-C equivalent of the Triton kernel
+// `_create_kv_indices_triton` previously defined in
+// `pymllm/layers/attention/flashinfer_backend.py`.
+//
+// Motivation
+// ----------
+// FlashInfer's paged KV attention API expects a *flat* buffer of KV indices
+// (`kv_indices`) together with a prefix-sum pointer array (`kv_indptr`).
+//
+//   * `kv_indices` is a 1-D int32 array that stores, for every token of every
+//     sequence in a batch, the corresponding *slot index* in the KV cache.
+//   * `kv_indptr` (length = batch_size + 1) stores prefix sums over the
+//     per-sequence token counts.  For sequence `i` we have tokens in:
+//
+//         kv_indices[kv_indptr[i] : kv_indptr[i + 1]]
+//
+// In pymllm, the mapping from (request_slot, position_in_sequence) to KV slot
+// index is stored in a 2-D tensor `req_to_token` owned by `ReqToTokenPool`:
+//
+//     req_to_token[req_slot, position] -> kv_index (int32)
+//
+// For each batch we also know:
+//   * which request slots we are serving: `req_pool_indices[bs]`
+//   * how many tokens to use from each sequence: `page_kernel_lens[bs]`
+//   * the starting position inside each sequence: `kv_start_idx[bs]` (optional,
+//     used for sliding-window / partial-context attention)
+//
+// This kernel converts that 2-D layout into the flat `(kv_indptr, kv_indices)`
+// layout in a single, highly parallel CUDA pass:
+//
+//   For each sequence i in the batch:
+//     - let req = req_pool_indices[i]
+//     - let len = page_kernel_lens[i]
+//     - let start = kv_start_idx[i] (or 0 if not provided)
+//     - let offset = kv_indptr[i]
+//     - for j in [0, len):
+//         kv_indices[offset + j] = req_to_token[req, start + j]
+//
+// Requirements / invariants
+// -------------------------
+// * `req_to_token` is int32 (aligned with sglang).
+// * All tensors must reside on the same CUDA device.
+// * The kernel is designed for extremely high throughput:
+//     - a block is assigned per sequence (batch element),
+//     - threads cooperate within the block to copy the token range with
+//       coalesced loads/stores.
+// * Shape and dtype checks are performed at runtime via mllm_kernel's
+//   TensorMatcher utilities, so misuse is caught with clear error messages.
+//
+// Integration
+// -----------
+// The exported entry point is `CreateKvIndicesKernel::run(...)`.  The Python
+// wrapper in `mllm_kernel/cuda/jit/create_kv_indices.py` JIT-compiles this
+// kernel and exposes a `create_kv_indices(...)` function which is then called
+// by `pymllm.layers.attention.flashinfer_backend`.
+
+#pragma once
+
+#include <mllm_kernel/tensor.hpp>  // TensorMatcher, SymbolicSize, SymbolicDevice, SymbolicDType
+#include <mllm_kernel/utils.hpp>   // div_ceil, RuntimeCheck, Panic
+#include <mllm_kernel/utils.cuh>   // LaunchKernel
+
+#include <dlpack/dlpack.h>
+#include <tvm/ffi/container/tensor.h>
+
+#include <cstdint>
+
+namespace {
+
+// ---------------------------------------------------------------------------
+// Parameter block passed to the CUDA kernel
+// ---------------------------------------------------------------------------
+//
+// We keep this struct trivially-copyable so it can be passed via
+// `__grid_constant__` if desired.  Each field is carefully documented to make
+// the data flow explicit.
+
+struct CreateKvIndicesParams {
+  // Pointer to ReqToTokenPool mapping table:
+  //   req_to_token[req_slot, position] -> kv_index (int32)
+  // shape: [max_reqs, max_context_len]
+  const int32_t* __restrict__ req_to_token;
+
+  // Request slots participating in this batch.
+  // shape: [batch_size]
+  const int32_t* __restrict__ req_pool_indices;
+
+  // Number of tokens to copy for each sequence in the batch.
+  // shape: [batch_size]
+  const int32_t* __restrict__ page_kernel_lens;
+
+  // Prefix sums over per-sequence token counts.
+  //   kv_indptr[i] is the starting offset in kv_indices for sequence i.
+  // shape: [batch_size + 1]
+  const int32_t* __restrict__ kv_indptr;
+
+  // Optional starting position inside each request's sequence.  When nullptr,
+  // we assume start = 0 for all sequences.  When non-null, shape is
+  // [batch_size].
+  const int32_t* __restrict__ kv_start_idx;
+
+  // Output flat KV index buffer (int32).  Length must be at least
+  // kv_indptr[batch_size].
+  int32_t* __restrict__ kv_indices;
+
+  // Stride of the first dimension of req_to_token, i.e. the number of
+  // positions per request (max_context_len).
+  int32_t req_to_token_stride;
+
+  // Number of sequences in the batch.
+  uint32_t batch_size;
+
+  // Whether kv_start_idx is valid (1) or should be ignored (0).
+  uint32_t has_kv_start;
+};
+
+// We use a fixed block size chosen to balance occupancy and per-sequence
+// parallelism.  Each block is mapped to a single sequence and threads within
+// the block cooperate to copy its token range.
+constexpr int kBlockSize = 256;
+
+// ---------------------------------------------------------------------------
+// Core CUDA kernel
+// ---------------------------------------------------------------------------
+//
+// Grid mapping:
+//   * blockIdx.x -> sequence index `i` in [0, batch_size)
+//   * threadIdx.x -> intra-sequence worker; threads stride over the token
+//     range [0, len) with step `blockDim.x`.
+//
+// This design has several advantages:
+//   * No inter-block synchronisation is required.
+//   * Memory accesses are fully coalesced because each thread block walks a
+//     contiguous segment of the `req_to_token` and `kv_indices` arrays.
+//   * It handles variable-length sequences naturally; sequences with more
+//     tokens simply iterate more in the inner loop.
+
+__global__ void create_kv_indices_kernel(const CreateKvIndicesParams params) {
+  const uint32_t seq_id = blockIdx.x;  // which sequence in the batch
+  if (seq_id >= params.batch_size) { return; }
+
+  // Resolve the request slot for this sequence.
+  const int32_t req_slot = params.req_pool_indices[seq_id];
+
+  // Compute the output range [out_offset, out_offset + len) in kv_indices.
+  const int32_t out_offset = params.kv_indptr[seq_id];
+  const int32_t len = params.page_kernel_lens[seq_id];
+
+  // Compute the starting position inside the original sequence.
+  int32_t start = 0;
+  if (params.has_kv_start && params.kv_start_idx != nullptr) { start = params.kv_start_idx[seq_id]; }
+
+  // Base pointers for this sequence.
+  const int32_t* __restrict__ row = params.req_to_token + static_cast<int64_t>(req_slot) * params.req_to_token_stride;
+  int32_t* __restrict__ out = params.kv_indices + out_offset;
+
+  // Each thread in the block handles a strided subset of [0, len).
+  for (int32_t t = threadIdx.x; t < len; t += blockDim.x) {
+    // Guard against out-of-bounds reads if (start + t) exceeds the
+    // configured context length.  Under normal conditions upstream
+    // invariants guarantee `start + len <= req_to_token_stride`, but
+    // this check makes the kernel robust against misconfigured inputs
+    // and prevents rare segmentation faults observed during testing.
+    const int32_t pos = start + t;
+    if (pos < 0 || pos >= params.req_to_token_stride) { continue; }
+
+    out[t] = row[pos];
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Host-side launcher used by the JIT wrapper
+// ---------------------------------------------------------------------------
+//
+// `CreateKvIndicesKernel::run(...)` is the C++ entry point that will be bound
+// to a TVM FFI function and called from Python via the JIT utility.  It is
+// responsible for:
+//   1. Validating tensor shapes / dtypes / devices.
+//   2. Extracting symbolic sizes and strides.
+//   3. Building the parameter block.
+//   4. Launching the CUDA kernel using mllm_kernel::host::LaunchKernel.
+
+struct CreateKvIndicesKernel {
+  static void run(tvm::ffi::TensorView req_to_token, tvm::ffi::TensorView req_pool_indices,
+                  tvm::ffi::TensorView page_kernel_lens, tvm::ffi::TensorView kv_indptr, tvm::ffi::TensorView kv_start_idx,
+                  tvm::ffi::TensorView kv_indices) {
+    using namespace mllm_kernel::host;
+
+    // ---------------------------------------------------------------------
+    // 1. Validate input tensors
+    // ---------------------------------------------------------------------
+    // req_to_token: [max_reqs, max_context_len], int32, CUDA
+    SymbolicSize MaxReqs{"max_reqs"};
+    SymbolicSize MaxCtx{"max_context_len"};
+    SymbolicSize ReqStride{"req_stride"};
+    SymbolicDType req_dtype;
+    SymbolicDevice device;
+
+    (void)TensorMatcher({MaxReqs, MaxCtx})
+        .with_strides({ReqStride, 1})
+        .with_dtype<int32_t>(req_dtype)
+        .with_device<kDLCUDA>(device)
+        .verify(req_to_token);
+
+    // req_pool_indices: [B], int32, CUDA
+    SymbolicSize B{"batch_size"};
+    SymbolicSize ReqPoolStride{"req_pool_stride"};
+    (void)TensorMatcher({B}).with_strides({ReqPoolStride}).with_dtype<int32_t>().with_device(device).verify(req_pool_indices);
+
+    // page_kernel_lens: [B], int32, same device
+    SymbolicSize PageStride{"page_stride"};
+    (void)TensorMatcher({B}).with_strides({PageStride}).with_dtype<int32_t>().with_device(device).verify(page_kernel_lens);
+
+    // kv_indptr: [Nind], int32, same device (we later require Nind >= B + 1)
+    SymbolicSize Nind{"indptr_len"};
+    (void)TensorMatcher({Nind}).with_dtype<int32_t>().with_device(device).verify(kv_indptr);
+
+    // kv_start_idx: either [B] or [0]; int32, same device
+    SymbolicSize StartLen{"start_len"};
+    SymbolicSize StartStride{"start_stride"};
+    (void)TensorMatcher({StartLen}).with_strides({StartStride}).with_dtype<int32_t>().with_device(device).verify(kv_start_idx);
+
+    // kv_indices: [Nidx], int32, same device
+    SymbolicSize Nidx{"num_indices"};
+    (void)TensorMatcher({Nidx}).with_dtype<int32_t>().with_device(device).verify(kv_indices);
+
+    // Extract concrete sizes.
+    const int64_t batch_size = B.unwrap();
+    const int64_t indptr_len = Nind.unwrap();
+    const int64_t req_stride = ReqStride.unwrap();
+
+    // Basic consistency checks.
+    RuntimeCheck(batch_size > 0, "batch_size must be positive, got ", batch_size);
+    RuntimeCheck(indptr_len >= batch_size + 1, "kv_indptr length (", indptr_len, ") must be at least batch_size+1 (",
+                 batch_size + 1, ")");
+
+    // NOTE: We intentionally do NOT read kv_indptr[batch_size] on the host to
+    // validate that kv_indices is large enough.  kv_indptr resides in device
+    // memory and dereferencing it from host code would be an illegal memory
+    // access (segfault).  Callers are responsible for ensuring that
+    // kv_indices.numel() >= kv_indptr[batch_size].
+
+    // kv_start_idx is optional; when StartLen == 0 we treat it as absent.
+    RuntimeCheck(StartLen.unwrap() == 0 || StartLen.unwrap() == batch_size,
+                 "kv_start_idx must have length 0 or batch_size; got ", StartLen.unwrap(), " vs batch_size=", batch_size);
+
+    const bool has_kv_start = (StartLen.unwrap() == batch_size);
+
+    // ---------------------------------------------------------------------
+    // 2. Build parameter block
+    // ---------------------------------------------------------------------
+    CreateKvIndicesParams params{
+        .req_to_token = static_cast<const int32_t*>(req_to_token.data_ptr()),
+        .req_pool_indices = static_cast<const int32_t*>(req_pool_indices.data_ptr()),
+        .page_kernel_lens = static_cast<const int32_t*>(page_kernel_lens.data_ptr()),
+        .kv_indptr = static_cast<const int32_t*>(kv_indptr.data_ptr()),
+        .kv_start_idx = has_kv_start ? static_cast<const int32_t*>(kv_start_idx.data_ptr()) : nullptr,
+        .kv_indices = static_cast<int32_t*>(kv_indices.data_ptr()),
+        .req_to_token_stride = static_cast<int32_t>(req_stride),
+        .batch_size = static_cast<uint32_t>(batch_size),
+        .has_kv_start = has_kv_start ? 1u : 0u,
+    };
+
+    const DLDevice dl_device = device.unwrap();
+
+    // ---------------------------------------------------------------------
+    // 3. Launch the CUDA kernel
+    // ---------------------------------------------------------------------
+    // We launch one block per sequence so that each sequence can be processed
+    // independently with fully coalesced memory accesses.  The per-thread
+    // inner loop runs over the token range [0, len) with stride = blockDim.x.
+
+    const int grid_size = static_cast<int>(batch_size);
+
+    LaunchKernel(grid_size, kBlockSize, dl_device)(create_kv_indices_kernel, params);
+  }
+};
+
+}  // namespace
diff --git a/mllm-kernel/mllm_kernel/cuda/csrc/vocab_embedding.cuh b/mllm-kernel/mllm_kernel/cuda/csrc/vocab_embedding.cuh
new file mode 100644
index 00000000..e69de29b
diff --git a/mllm-kernel/mllm_kernel/cuda/jit/create_kv_indices.py b/mllm-kernel/mllm_kernel/cuda/jit/create_kv_indices.py
new file mode 100644
index 00000000..565686a4
--- /dev/null
+++ b/mllm-kernel/mllm_kernel/cuda/jit/create_kv_indices.py
@@ -0,0 +1,118 @@
+"""High-performance CUDA JIT wrapper for create_kv_indices.
+
+This module exposes a single function:
+
+    create_kv_indices(req_to_token, req_pool_indices,
+                      page_kernel_lens, kv_indptr,
+                      kv_start_idx, kv_indices)
+
+which is a Python binding around the C++/CUDA kernel defined in
+`mllm_kernel/cuda/csrc/create_kv_indices.cuh`.
+
+The kernel transforms pymllm's 2-D ReqToTokenPool mapping table into the flat
+`(kv_indptr, kv_indices)` layout expected by FlashInfer's paged KV attention
+wrappers.  It is carefully written for maximum throughput and is intended to
+replace the Triton implementation `_create_kv_indices_triton` in
+`pymllm.layers.attention.flashinfer_backend`.
+"""
+
+from __future__ import annotations
+
+import torch
+
+from mllm_kernel.jit_utils import cache_once, jit
+
+
+@cache_once
+def _make_create_kv_indices_kernel():
+    """JIT-compile the CUDA kernel and return a callable wrapper.
+
+    The JIT system will:
+      * locate `create_kv_indices.cuh` under the mllm-kernel CUDA csrc tree,
+      * compile it into a TVM FFI module,
+      * expose `CreateKvIndicesKernel::run` as `compiled_module.create_kv_indices`.
+    """
+
+    @jit(
+        args=[],
+        device="cuda",
+        cuda_files=["create_kv_indices.cuh"],
+        cpp_wrappers=[],
+        cuda_wrappers=[
+            ("create_kv_indices", "CreateKvIndicesKernel::run"),
+        ],
+        func_name="create_kv_indices",
+    )
+    def _kernel(
+        compiled_module,
+        req_to_token: torch.Tensor,
+        req_pool_indices: torch.Tensor,
+        page_kernel_lens: torch.Tensor,
+        kv_indptr: torch.Tensor,
+        kv_start_idx: torch.Tensor,
+        kv_indices: torch.Tensor,
+    ) -> None:
+        compiled_module.create_kv_indices(
+            req_to_token,
+            req_pool_indices,
+            page_kernel_lens,
+            kv_indptr,
+            kv_start_idx,
+            kv_indices,
+        )
+
+    return _kernel
+
+
+def create_kv_indices(
+    req_to_token: torch.Tensor,
+    req_pool_indices: torch.Tensor,
+    page_kernel_lens: torch.Tensor,
+    kv_indptr: torch.Tensor,
+    kv_start_idx: torch.Tensor | None,
+    kv_indices: torch.Tensor,
+) -> None:
+    """Fill a flat KV-index buffer from the ReqToTokenPool mapping.
+
+    This is a thin Python wrapper that forwards to the JIT-compiled CUDA
+    kernel.  All tensors must be placed on the same CUDA device.
+
+    Args
+    ----
+    req_to_token:
+        Mapping tensor from ReqToTokenPool, shape
+        ``[max_reqs, max_context_len]``, dtype ``torch.int32``.
+    req_pool_indices:
+        Request slots participating in this batch, shape ``[batch_size]``,
+        dtype ``torch.int32``.
+    page_kernel_lens:
+        Per-sequence token counts (how many tokens to attend), shape
+        ``[batch_size]``, dtype ``torch.int32``.
+    kv_indptr:
+        Prefix sums over per-sequence token counts, shape ``[batch_size + 1]``,
+        dtype ``torch.int32``.  ``kv_indptr[i]`` is the starting offset in
+        ``kv_indices`` for sequence ``i``.
+    kv_start_idx:
+        Optional starting positions inside each sequence, shape
+        ``[batch_size]`` or ``[0]``, dtype ``torch.int32``.  When
+        ``None``, the kernel assumes 0 for all sequences.
+    kv_indices:
+        Output flat KV-index buffer, shape ``[N]``, dtype ``torch.int32``.
+        ``N`` must be at least ``kv_indptr[batch_size]``.
+    """
+    if kv_start_idx is None:
+        # Use an empty tensor to signal "no start offsets".  The C++ launcher
+        # treats length==0 as "no kv_start" and will pass a nullptr into the
+        # parameter block, which is slightly cheaper than materialising a
+        # full zero tensor on every call.
+        kv_start_idx = req_pool_indices.new_empty(0, dtype=torch.int32)
+
+    kernel = _make_create_kv_indices_kernel()
+    kernel(
+        req_to_token,
+        req_pool_indices,
+        page_kernel_lens,
+        kv_indptr,
+        kv_start_idx,
+        kv_indices,
+    )
diff --git a/mllm-kernel/pyproject.toml b/mllm-kernel/pyproject.toml
index 77340b29..13147f06 100644
--- a/mllm-kernel/pyproject.toml
+++ b/mllm-kernel/pyproject.toml
@@ -55,7 +55,7 @@ logging.level = "INFO"
 
 # Wheel configuration - include the Python package
 wheel.packages = ["mllm_kernel"]
-wheel.install-dir = "mllm_kernel"
+wheel.install-dir = ""
 
 # Install directories for cmake targets
 wheel.cmake = true
diff --git a/mllm-kernel/tests/test_create_kv_indices.py b/mllm-kernel/tests/test_create_kv_indices.py
new file mode 100644
index 00000000..e8bf770a
--- /dev/null
+++ b/mllm-kernel/tests/test_create_kv_indices.py
@@ -0,0 +1,191 @@
+from __future__ import annotations
+
+import pytest
+import torch
+
+from mllm_kernel.cuda.jit.create_kv_indices import create_kv_indices
+
+
+def _make_batch(
+    *,
+    max_reqs: int,
+    max_ctx: int,
+    batch_size: int,
+    use_start_offsets: bool,
+    seed: int = 0,
+):
+    """Construct a random-but-bounded test batch for create_kv_indices.
+
+    The constraints ensure that for every sequence i:
+        0 <= kv_start_idx[i]
+        0 < page_kernel_lens[i]
+        kv_start_idx[i] + page_kernel_lens[i] <= max_ctx
+    so the kernel never reads beyond the ReqToTokenPool row.
+    """
+    # Use a CUDA generator for randperm (which requires matching device)
+    # and a separate CPU generator for randint (which only accepts CPU).
+    g_cuda = torch.Generator(device="cuda").manual_seed(seed)
+    g_cpu = torch.Generator(device="cpu").manual_seed(seed)
+
+    device = "cuda"
+    # req_to_token[req_slot, position] -> kv_index (here we simply use a
+    # monotonically increasing pattern so correctness is easy to check).
+    req_to_token = torch.arange(
+        max_reqs * max_ctx, dtype=torch.int32, device=device
+    ).reshape(max_reqs, max_ctx)
+
+    # Sample distinct request slots for the batch.
+    assert batch_size <= max_reqs
+    req_pool_indices = torch.randperm(max_reqs, generator=g_cuda, device=device)[
+        :batch_size
+    ].to(torch.int32)
+
+    # For each sequence choose a valid (start, length) pair.
+    page_kernel_lens_list = []
+    kv_start_idx_list = []
+    for _ in range(batch_size):
+        # ensure at least 1 token per sequence
+        L = int(torch.randint(1, max_ctx, (1,), generator=g_cpu).item())
+        if use_start_offsets:
+            start_max = max_ctx - L
+            start = int(torch.randint(0, max(start_max, 1), (1,), generator=g_cpu).item())
+        else:
+            start = 0
+        page_kernel_lens_list.append(L)
+        kv_start_idx_list.append(start)
+
+    page_kernel_lens = torch.tensor(
+        page_kernel_lens_list, dtype=torch.int32, device=device
+    )
+    kv_start_idx = torch.tensor(kv_start_idx_list, dtype=torch.int32, device=device)
+
+    # Build kv_indptr prefix sums.
+    kv_indptr = torch.empty(batch_size + 1, dtype=torch.int32, device=device)
+    kv_indptr[0] = 0
+    kv_indptr[1:] = torch.cumsum(page_kernel_lens, dim=0)
+
+    kv_indices = torch.empty(
+        int(kv_indptr[-1].item()), dtype=torch.int32, device=device
+    )
+
+    return (
+        req_to_token,
+        req_pool_indices,
+        page_kernel_lens,
+        kv_indptr,
+        kv_start_idx,
+        kv_indices,
+    )
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is required")
+@pytest.mark.parametrize("use_start_offsets", [False, True])
+@pytest.mark.parametrize(
+    "batch_size,max_reqs,max_ctx",
+    [
+        (1, 4, 16),        # minimal batch
+        (4, 8, 64),        # small batch
+        (32, 64, 512),     # medium batch, longer context
+        (128, 256, 2048),  # larger batch, stress inner loop
+    ],
+)
+def test_create_kv_indices_matches_reference(
+    use_start_offsets: bool,
+    batch_size: int,
+    max_reqs: int,
+    max_ctx: int,
+):
+    """create_kv_indices must match a naive PyTorch reference implementation.
+
+    The reference is computed on CPU using explicit loops over
+    (request_slot, start, length); the CUDA kernel must produce identical
+    flat kv_indices for the same inputs.
+    """
+    (
+        req_to_token,
+        req_pool_indices,
+        page_kernel_lens,
+        kv_indptr,
+        kv_start_idx,
+        kv_indices,
+    ) = _make_batch(
+        max_reqs=max_reqs,
+        max_ctx=max_ctx,
+        batch_size=batch_size,
+        use_start_offsets=use_start_offsets,
+        seed=2026,
+    )
+
+    # Call CUDA kernel (kv_start_idx can be None to exercise that path).
+    create_kv_indices(
+        req_to_token,
+        req_pool_indices,
+        page_kernel_lens,
+        kv_indptr,
+        kv_start_idx if use_start_offsets else None,
+        kv_indices,
+    )
+    torch.cuda.synchronize()
+
+    # Naive reference on CPU.
+    req_to_token_cpu = req_to_token.cpu()
+    req_pool_indices_cpu = req_pool_indices.cpu().to(torch.long)
+    page_kernel_lens_cpu = page_kernel_lens.cpu()
+    kv_start_idx_cpu = kv_start_idx.cpu()
+
+    ref_segments = []
+    for i in range(batch_size):
+        req = req_pool_indices_cpu[i].item()
+        start = kv_start_idx_cpu[i].item() if use_start_offsets else 0
+        L = page_kernel_lens_cpu[i].item()
+        row = req_to_token_cpu[req, start : start + L]
+        ref_segments.append(row)
+    ref = torch.cat(ref_segments, dim=0)
+
+    assert kv_indices.shape == ref.shape
+    assert torch.equal(kv_indices.cpu(), ref)
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is required")
+def test_single_token_per_sequence():
+    """Each sequence has exactly 1 token — exercises the minimal-work path."""
+    device = "cuda"
+    bs = 8
+    max_ctx = 32
+    req_to_token = torch.arange(bs * max_ctx, dtype=torch.int32, device=device).reshape(bs, max_ctx)
+    req_pool_indices = torch.arange(bs, dtype=torch.int32, device=device)
+    page_kernel_lens = torch.ones(bs, dtype=torch.int32, device=device)
+    kv_indptr = torch.arange(bs + 1, dtype=torch.int32, device=device)
+    kv_indices = torch.empty(bs, dtype=torch.int32, device=device)
+
+    create_kv_indices(req_to_token, req_pool_indices, page_kernel_lens, kv_indptr, None, kv_indices)
+    torch.cuda.synchronize()
+
+    # Each sequence contributes req_to_token[i, 0].
+    expected = req_to_token[:, 0]
+    assert torch.equal(kv_indices, expected)
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is required")
+def test_oversized_output_buffer():
+    """kv_indices buffer is larger than needed (prefill path uses +256 padding)."""
+    device = "cuda"
+    bs = 4
+    max_ctx = 64
+    req_to_token = torch.arange(bs * max_ctx, dtype=torch.int32, device=device).reshape(bs, max_ctx)
+    req_pool_indices = torch.arange(bs, dtype=torch.int32, device=device)
+    page_kernel_lens = torch.full((bs,), 10, dtype=torch.int32, device=device)
+    kv_indptr = torch.arange(0, bs * 10 + 1, 10, dtype=torch.int32, device=device)
+    # Allocate with extra padding, like the prefill path does.
+    kv_indices = torch.full((bs * 10 + 256,), -1, dtype=torch.int32, device=device)
+
+    create_kv_indices(req_to_token, req_pool_indices, page_kernel_lens, kv_indptr, None, kv_indices)
+    torch.cuda.synchronize()
+
+    # First bs*10 entries should match; padding should remain -1.
+    ref_segments = []
+    for i in range(bs):
+        ref_segments.append(req_to_token[i, :10])
+    ref = torch.cat(ref_segments, dim=0)
+    assert torch.equal(kv_indices[:bs * 10], ref)
+    assert torch.all(kv_indices[bs * 10:] == -1)
diff --git a/pymllm/configs/server_config.py b/pymllm/configs/server_config.py
index 9e399d62..f6a2090f 100644
--- a/pymllm/configs/server_config.py
+++ b/pymllm/configs/server_config.py
@@ -79,6 +79,39 @@ class ServerConfig:
     # Feature switches
     # --------------------------------------------------------------------- #
     enable_shared_queue: bool = False  # Use shared memory queue for fast IPC
+
+    # CUDA IPC transport for multimodal GPU tensors.
+    # Requires enable_shared_queue=True to take effect.
+    #
+    # Three transport modes (mutually exclusive for GPU tensors):
+    #
+    #   "default"
+    #       GPU tensors are moved to CPU first (GPU→CPU copy), then placed in
+    #       POSIX shared memory via share_memory_(). Safe but adds a device copy.
+    #
+    #   "cuda_ipc"
+    #       GPU tensors stay on GPU. Each tensor is wrapped in a
+    #       TransportProxyTensor whose __getstate__ calls storage._share_cuda_()
+    #       to obtain an IPC handle; the receiver reconstructs via
+    #       UntypedStorage._new_shared_cuda(*handle). Simple, but the underlying
+    #       GPU allocation is never freed until the sender process exits
+    #       (PyTorch limitation) -- can leak GPU memory in long-running services.
+    #
+    #   "cuda_ipc_pool"  [recommended for production]
+    #       GPU tensors are copied into a pre-allocated fixed-size GPU workspace
+    #       (MmItemMemoryPool). Each outgoing tensor occupies a "chunk" of the
+    #       pool; the chunk's IPC handle is sent via CudaIpcTensorTransportProxy.
+    #       After the receiver finishes copying data it increments a shared-memory
+    #       sync flag; a background recycler thread in the sender watches these
+    #       flags and returns chunks to the available pool. No GPU memory is leaked.
+    tensor_transport_mode: str = "default"  # one of: default, cuda_ipc, cuda_ipc_pool
+
+    # Size of the pre-allocated CUDA IPC memory pool in MB.
+    # Only used when tensor_transport_mode == "cuda_ipc_pool".
+    cuda_ipc_pool_size_mb: int = 512
+
+    # How often (seconds) the pool recycler thread wakes up.
+    cuda_ipc_recycle_interval: float = 0.1
     # enable_lora: bool = False
     # max_loaded_loras: Optional[int] = None
     # max_loras_per_batch: int = 8
@@ -102,6 +135,18 @@ def __post_init__(self) -> None:
         self._validate()
 
     def _validate(self) -> None:
+        valid_modes = {"default", "cuda_ipc", "cuda_ipc_pool"}
+        if self.tensor_transport_mode not in valid_modes:
+            raise ValueError(
+                f"`tensor_transport_mode` must be one of {valid_modes}, "
+                f"got {self.tensor_transport_mode!r}."
+            )
+        if self.tensor_transport_mode != "default" and not self.enable_shared_queue:
+            raise ValueError(
+                "`tensor_transport_mode` != 'default' requires `enable_shared_queue=True`."
+            )
+        if self.cuda_ipc_pool_size_mb <= 0:
+            raise ValueError("`cuda_ipc_pool_size_mb` must be > 0.")
         if self.port <= 0 or self.port > 65535:
             raise ValueError("`port` must be in range [1, 65535].")
         if self.max_prefill_tokens is not None and self.max_prefill_tokens <= 0:
diff --git a/pymllm/engine/__init__.py b/pymllm/engine/__init__.py
index e69de29b..50f2b724 100644
--- a/pymllm/engine/__init__.py
+++ b/pymllm/engine/__init__.py
@@ -0,0 +1,8 @@
+"""Engine module for pymllm."""
+
+from pymllm.engine.forward_batch import ForwardBatch, ForwardMode
+
+__all__ = [
+    "ForwardBatch",
+    "ForwardMode",
+]
diff --git a/pymllm/engine/forward_batch.py b/pymllm/engine/forward_batch.py
index e69de29b..ebb715ff 100644
--- a/pymllm/engine/forward_batch.py
+++ b/pymllm/engine/forward_batch.py
@@ -0,0 +1,182 @@
+"""ForwardMode and ForwardBatch for pymllm.
+
+Simplified forward-batch abstraction: no speculative decoding, no
+encoder-decoder support, and no distributed-attention complexity (DP/TP
+head splitting is handled at the layer level by the model code, not here).
+
+Typical data flow
+-----------------
+   ModelRunner builds a ForwardBatch
+       ↓
+   attn_backend.init_forward_metadata(forward_batch)
+       ↓
+   model.forward(input_ids, positions, forward_batch)
+       ↓
+   RadixAttention.forward(q, k, v, forward_batch)
+       ↓
+   forward_batch.attn_backend.forward(q, k, v, layer, forward_batch)
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import IntEnum, auto
+from typing import TYPE_CHECKING, List, Optional
+
+import torch
+
+if TYPE_CHECKING:
+    from pymllm.layers.attention.attention_backend import AttentionBackend
+    from pymllm.mem_cache.memory_pool import KVPool, ReqToTokenPool
+
+
+# ---------------------------------------------------------------------------
+# ForwardMode
+# ---------------------------------------------------------------------------
+
+
+class ForwardMode(IntEnum):
+    """Describes what kind of forward pass is being performed.
+
+    Covers standard prefill / decode inference without speculative decoding.
+    """
+
+    # Prefill / extend: process new tokens.  The KV cache of the prefix (if
+    # any) is already populated (e.g. shared system-prompt via radix cache).
+    EXTEND = auto()
+
+    # Decode: generate exactly one new token per sequence.
+    DECODE = auto()
+
+    # Mixed: a chunked-prefill batch that contains both extend and decode
+    # sequences simultaneously.
+    MIXED = auto()
+
+    # Idle: no sequences to process (used with data-parallel workers when some
+    # ranks have no allocated sequences).
+    IDLE = auto()
+
+    # ---- helpers ----
+
+    def is_extend(self) -> bool:
+        """True for EXTEND or MIXED (i.e. any prefill-style pass)."""
+        return self in (ForwardMode.EXTEND, ForwardMode.MIXED)
+
+    def is_prefill(self) -> bool:
+        """Alias for ``is_extend()``."""
+        return self.is_extend()
+
+    def is_decode(self) -> bool:
+        return self == ForwardMode.DECODE
+
+    def is_mixed(self) -> bool:
+        return self == ForwardMode.MIXED
+
+    def is_idle(self) -> bool:
+        return self == ForwardMode.IDLE
+
+    def is_decode_or_idle(self) -> bool:
+        return self == ForwardMode.DECODE or self == ForwardMode.IDLE
+
+
+# ---------------------------------------------------------------------------
+# ForwardBatch
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class ForwardBatch:
+    """All tensors required by a single forward pass through the model.
+
+    Parameters
+    ----------
+    forward_mode
+        The kind of pass being performed (EXTEND / DECODE / MIXED / IDLE).
+    batch_size
+        Number of sequences in the batch.
+    input_ids
+        Token ids for every position in the batch, shape ``[num_tokens]``.
+        For decode, ``num_tokens == batch_size``; for extend,
+        ``num_tokens == extend_num_tokens``.
+    req_pool_indices
+        Index of each sequence in ``ReqToTokenPool``, shape ``[batch_size]``
+        (int32 or int64, on the target device).
+    seq_lens
+        Total (prefix + new) length of each sequence, shape ``[batch_size]``
+        (int32).
+    out_cache_loc
+        KV-pool slot that each *output* token is written to, shape
+        ``[num_tokens]`` (int64).
+    seq_lens_sum
+        Python ``int`` equal to ``seq_lens.sum()``.  Cached to avoid repeated
+        device-to-host syncs.
+    seq_lens_cpu
+        CPU copy of ``seq_lens`` (optional; used by some attention backends
+        for plan computation without a device sync).
+    positions
+        Token position for each input token, shape ``[num_tokens]``
+        (int32 or int64).
+    extend_num_tokens
+        Total number of new (non-prefix) tokens across the batch.  Only set
+        during EXTEND / MIXED passes.
+    extend_seq_lens
+        Number of *new* tokens for each sequence, shape ``[batch_size]``
+        (int32).  Only set during EXTEND / MIXED.
+    extend_prefix_lens
+        Length of the already-cached prefix for each sequence,
+        shape ``[batch_size]`` (int32).  Only set during EXTEND / MIXED.
+    extend_start_loc
+        Cumulative start offset of each sequence in the flattened extend
+        token stream, shape ``[batch_size]`` (int32).
+    extend_prefix_lens_cpu
+        CPU list mirror of ``extend_prefix_lens``.
+    extend_seq_lens_cpu
+        CPU list mirror of ``extend_seq_lens``.
+    return_logprob
+        Whether to compute per-token log-probabilities.
+    top_logprobs_nums
+        Number of top log-probs to return per sequence (None or list of ints).
+    req_to_token_pool
+        Reference to the ``ReqToTokenPool`` (set by the model runner).
+    token_to_kv_pool
+        Reference to the ``KVPool`` (set by the model runner).
+    attn_backend
+        The attention backend to use (set by the model runner before calling
+        ``model.forward``).
+    """
+
+    # ---- required fields (positional) ----
+    forward_mode: ForwardMode
+    batch_size: int
+    input_ids: torch.Tensor  # [num_tokens]
+    req_pool_indices: torch.Tensor  # [batch_size]   int32/int64
+    seq_lens: torch.Tensor  # [batch_size]   int32
+    out_cache_loc: torch.Tensor  # [num_tokens]   int64
+    seq_lens_sum: int  # python int
+
+    # ---- optional metadata ----
+
+    # CPU mirror of seq_lens
+    seq_lens_cpu: Optional[torch.Tensor] = None
+
+    # Position encoding – shape [num_tokens], int32 or int64
+    positions: Optional[torch.Tensor] = None
+
+    # ---- extend / prefill specific ----
+    extend_num_tokens: Optional[int] = None
+    extend_seq_lens: Optional[torch.Tensor] = None  # [batch_size] int32
+    extend_prefix_lens: Optional[torch.Tensor] = None  # [batch_size] int32
+    extend_start_loc: Optional[torch.Tensor] = None  # [batch_size] int32
+    extend_prefix_lens_cpu: Optional[List[int]] = None
+    extend_seq_lens_cpu: Optional[List[int]] = None
+
+    # ---- logprob options ----
+    return_logprob: bool = False
+    top_logprobs_nums: Optional[List[int]] = None
+
+    # ---- memory pools (set by model runner) ----
+    req_to_token_pool: Optional["ReqToTokenPool"] = None
+    token_to_kv_pool: Optional["KVPool"] = None
+
+    # ---- attention backend (set by model runner) ----
+    attn_backend: Optional["AttentionBackend"] = None
diff --git a/pymllm/engine/launch.py b/pymllm/engine/launch.py
index 2200d7f3..2ba04e1c 100644
--- a/pymllm/engine/launch.py
+++ b/pymllm/engine/launch.py
@@ -26,7 +26,6 @@
     ReqState,
     RequestResponseProcess,
 )
-from pymllm.orchestrator.shared_memory_queue import TensorQueue
 from pymllm.orchestrator.tokenizer_process import run_tokenizer_process
 from pymllm.orchestrator.scheduler_process import run_scheduler_process
 from pymllm.orchestrator.model_runner_process import run_model_runner_process
@@ -80,13 +79,30 @@ def _launch_processes(self) -> None:
         # Config dict for the tokenizer subprocess (must be picklable).
         cfg = get_global_config()
         enable_shared_queue = cfg.server.enable_shared_queue
-
-        # Create shared queue if enabled
+        transport_mode: str = (
+            cfg.server.tensor_transport_mode
+        )  # "default" | "cuda_ipc" | "cuda_ipc_pool"
+
+        # Create shared queue if enabled.
+        # Note: the MmItemMemoryPool (for "cuda_ipc_pool") is created *inside*
+        # the tokenizer subprocess after CUDA is initialised.  The queue here
+        # is constructed without a pool; TokenizerProcess._ensure_pool() will
+        # swap in a pool-aware queue at runtime.
         shared_queue = None
         if enable_shared_queue:
-            # TODO: WCH init CUDA IPC things.
-            shared_queue = TensorQueue(maxsize=1000)  # Configurable max size
-            logger.info("Shared memory queue enabled for fast IPC")
+            from pymllm.orchestrator.shared_memory_queue import TensorQueue as _TQ
+
+            # Construct with the configured transport mode.  The pool is not
+            # supplied here; it will be lazily initialised inside the subprocess.
+            shared_queue = _TQ(
+                maxsize=1000,
+                transport_mode=transport_mode,
+                pool=None,  # pool initialised lazily inside TokenizerProcess
+            )
+            logger.info(
+                "Shared memory queue enabled for fast IPC (transport_mode=%s)",
+                transport_mode,
+            )
 
         tokenizer_cfg: Dict[str, Any] = {
             "tokenizer_path": str(cfg.server.tokenizer_path),
@@ -95,6 +111,9 @@ def _launch_processes(self) -> None:
             "context_length": cfg.server.context_length,
             "hf_config": cfg.model.hf_config,
             "enable_shared_queue": enable_shared_queue,
+            "tensor_transport_mode": transport_mode,
+            "cuda_ipc_pool_size_mb": cfg.server.cuda_ipc_pool_size_mb,
+            "cuda_ipc_recycle_interval": cfg.server.cuda_ipc_recycle_interval,
         }
 
         # Tokenizer
@@ -124,6 +143,7 @@ def _launch_processes(self) -> None:
                 scheduler_writer,
                 shared_queue,  # Pass shared queue
                 enable_shared_queue,  # Pass flag
+                transport_mode,  # Pass tensor transport mode
             ),
             daemon=True,
         )
diff --git a/pymllm/layers/attention/__init__.py b/pymllm/layers/attention/__init__.py
index e69de29b..5d0dbf07 100644
--- a/pymllm/layers/attention/__init__.py
+++ b/pymllm/layers/attention/__init__.py
@@ -0,0 +1,25 @@
+"""Attention layers and backends for pymllm."""
+
+from pymllm.layers.attention.attention_backend import AttentionBackend
+from pymllm.layers.attention.flashinfer_backend import (
+    DecodeMetadata,
+    FlashInferAttnBackend,
+    PrefillMetadata,
+    WrapperDispatch,
+    should_use_tensor_core,
+)
+from pymllm.layers.attention.radix_attention import AttentionType, RadixAttention
+
+__all__ = [
+    # Base
+    "AttentionBackend",
+    # RadixAttention
+    "AttentionType",
+    "RadixAttention",
+    # FlashInfer backend
+    "FlashInferAttnBackend",
+    "DecodeMetadata",
+    "PrefillMetadata",
+    "WrapperDispatch",
+    "should_use_tensor_core",
+]
diff --git a/pymllm/layers/attention/attention_backend.py b/pymllm/layers/attention/attention_backend.py
index e69de29b..07e2f6a1 100644
--- a/pymllm/layers/attention/attention_backend.py
+++ b/pymllm/layers/attention/attention_backend.py
@@ -0,0 +1,143 @@
+"""Abstract base class for pymllm attention backends.
+
+Every concrete backend (FlashInfer, Triton, torch-native, …) must implement
+at minimum:
+
+  * ``init_forward_metadata`` – called once per batch before the model forward.
+  * ``forward_extend``        – prefill / extend attention.
+  * ``forward_decode``        – single-token decode attention.
+
+The public ``forward`` method dispatches to the correct variant based on
+``forward_batch.forward_mode``.
+"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Optional
+
+import torch
+
+if TYPE_CHECKING:
+    from pymllm.engine.forward_batch import ForwardBatch, ForwardMode
+    from pymllm.layers.attention.radix_attention import RadixAttention
+
+
+class AttentionBackend(ABC):
+    """Abstract base class for attention backends.
+
+    All concrete backends inherit from this class and implement the abstract
+    methods below.
+    """
+
+    # ------------------------------------------------------------------
+    # Core interface – must be implemented by every backend
+    # ------------------------------------------------------------------
+
+    @abstractmethod
+    def init_forward_metadata(self, forward_batch: "ForwardBatch") -> None:
+        """Prepare per-batch metadata before the model's attention layers run.
+
+        For FlashInfer this plans the KV-index arrays and calls
+        ``wrapper.begin_forward``; for Triton / torch-native this is a no-op.
+        Must be called once per batch *before* ``model.forward``.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def forward_decode(
+        self,
+        q: torch.Tensor,
+        k: Optional[torch.Tensor],
+        v: Optional[torch.Tensor],
+        layer: "RadixAttention",
+        forward_batch: "ForwardBatch",
+        save_kv_cache: bool = True,
+        **kwargs,
+    ) -> torch.Tensor:
+        """Run attention for a decode step (one new token per sequence)."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def forward_extend(
+        self,
+        q: torch.Tensor,
+        k: Optional[torch.Tensor],
+        v: Optional[torch.Tensor],
+        layer: "RadixAttention",
+        forward_batch: "ForwardBatch",
+        save_kv_cache: bool = True,
+        **kwargs,
+    ) -> torch.Tensor:
+        """Run attention for a prefill / extend step."""
+        raise NotImplementedError
+
+    # ------------------------------------------------------------------
+    # Dispatch – shared logic; do not override in normal backends
+    # ------------------------------------------------------------------
+
+    def forward(
+        self,
+        q: torch.Tensor,
+        k: Optional[torch.Tensor],
+        v: Optional[torch.Tensor],
+        layer: "RadixAttention",
+        forward_batch: "ForwardBatch",
+        save_kv_cache: bool = True,
+        **kwargs,
+    ) -> torch.Tensor:
+        """Dispatch to ``forward_decode`` or ``forward_extend`` based on mode.
+
+        For IDLE batches a zero-filled output tensor is returned without any
+        compute.
+        """
+        if forward_batch.forward_mode.is_idle():
+            # Return empty output without computation.
+            return q.new_empty(q.shape[0], layer.tp_q_head_num * layer.v_head_dim)
+        elif forward_batch.forward_mode.is_decode():
+            return self.forward_decode(
+                q, k, v, layer, forward_batch, save_kv_cache=save_kv_cache, **kwargs
+            )
+        else:
+            return self.forward_extend(
+                q, k, v, layer, forward_batch, save_kv_cache=save_kv_cache, **kwargs
+            )
+
+    # ------------------------------------------------------------------
+    # Optional CUDA-graph interface
+    # ------------------------------------------------------------------
+
+    def get_cuda_graph_seq_len_fill_value(self) -> int:
+        """Fill value used to pad ``seq_lens`` tensors for CUDA-graph capture.
+
+        Most backends use ``1`` (not ``0``) to avoid division-by-zero in
+        attention kernels.
+        """
+        raise NotImplementedError
+
+    def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int) -> None:
+        """Allocate shared CUDA-graph state (buffers reused across captures)."""
+        raise NotImplementedError
+
+    def init_forward_metadata_capture_cuda_graph(
+        self,
+        bs: int,
+        num_tokens: int,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+        forward_mode: "ForwardMode",
+    ) -> None:
+        """Set up per-batch metadata for capturing a CUDA graph."""
+        raise NotImplementedError
+
+    def init_forward_metadata_replay_cuda_graph(
+        self,
+        bs: int,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+        seq_lens_sum: int,
+        forward_mode: "ForwardMode",
+        seq_lens_cpu: Optional[torch.Tensor],
+    ) -> None:
+        """Update metadata when replaying a captured CUDA graph."""
+        raise NotImplementedError
diff --git a/pymllm/layers/attention/flashinfer_backend.py b/pymllm/layers/attention/flashinfer_backend.py
index e69de29b..479fb5ce 100644
--- a/pymllm/layers/attention/flashinfer_backend.py
+++ b/pymllm/layers/attention/flashinfer_backend.py
@@ -0,0 +1,964 @@
+"""FlashInfer attention backend for pymllm.
+
+  * No model-runner object -- constructor takes explicit scalar / tensor params.
+  * No tensor-parallelism head splitting (handled at the model layer level).
+  * No speculative decoding support.
+  * ``KVPool`` API:
+      - ``get_kv_buffer(layer_id)`` returns ``(k_buf, v_buf)`` each shaped
+        ``[buf_len, num_heads, head_dim]``.
+      - ``set_kv_buffer(layer_id, indices, k, v)`` -- no scale arguments.
+
+Supports:
+  * Single-wrapper mode   (full context, no sliding window)
+  * Sliding-window mode   (two wrappers: window + full)
+  * CUDA-graph capture / replay for decode and target-verify passes.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from dataclasses import dataclass
+from enum import Enum, auto
+from typing import List, Optional, Union
+
+import torch
+
+from pymllm.engine.forward_batch import ForwardBatch, ForwardMode
+from pymllm.layers.attention.attention_backend import AttentionBackend
+from mllm_kernel.cuda.jit.create_kv_indices import create_kv_indices
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Optional FlashInfer import
+# ---------------------------------------------------------------------------
+
+_flashinfer_available = False
+try:
+    from flashinfer import (
+        BatchDecodeWithPagedKVCacheWrapper,
+        BatchPrefillWithPagedKVCacheWrapper,
+        BatchPrefillWithRaggedKVCacheWrapper,
+    )
+
+    try:
+        from flashinfer import fast_decode_plan
+        from functools import partial as _partial
+
+        _has_fast_decode_plan = True
+    except ImportError:
+        _has_fast_decode_plan = False
+
+    from flashinfer.cascade import merge_state
+
+    _flashinfer_available = True
+except ImportError:
+    logger.warning(
+        "flashinfer is not installed; FlashInferAttnBackend will raise "
+        "NotImplementedError if used."
+    )
+
+# ---------------------------------------------------------------------------
+# Global workspace buffer (shared across all FlashInfer wrapper instances)
+# ---------------------------------------------------------------------------
+
+_global_workspace_buffer: Optional[torch.Tensor] = None
+
+# Default workspace size (128 MB); can be overridden via environment variable.
+_DEFAULT_WORKSPACE_BYTES = int(
+    os.environ.get("PYMLLM_FLASHINFER_WORKSPACE_SIZE", 128 * 1024 * 1024)
+)
+
+# ---------------------------------------------------------------------------
+# Enums / dataclasses
+# ---------------------------------------------------------------------------
+
+
+class WrapperDispatch(Enum):
+    """Indicates which wrapper to use for a given attention layer."""
+
+    SLIDING_WINDOW = auto()
+    CROSS_ATTENTION = auto()
+
+
+@dataclass
+class DecodeMetadata:
+    """Per-batch metadata for a decode step."""
+
+    decode_wrappers: "List[BatchDecodeWithPagedKVCacheWrapper]"
+
+
+@dataclass
+class PrefillMetadata:
+    """Per-batch metadata for a prefill / extend step."""
+
+    prefill_wrappers: "List[BatchPrefillWithPagedKVCacheWrapper]"
+    use_ragged: bool
+    extend_no_prefix: bool
+
+
+# ---------------------------------------------------------------------------
+# CUDA kernel – build the flat kv_indices array for FlashInfer
+# ---------------------------------------------------------------------------
+
+# ---------------------------------------------------------------------------
+# Helper – choose whether to use tensor cores for decode
+# ---------------------------------------------------------------------------
+
+
+def should_use_tensor_core(
+    kv_cache_dtype: torch.dtype,
+    num_attention_heads: int,
+    num_kv_heads: int,
+) -> bool:
+    """Return whether FlashInfer decode should use tensor cores.
+
+    For FP8 we always use tensor cores.  For fp16 / bf16 we use them when
+    the GQA group size (num_attention_heads / num_kv_heads) is ≥ 4, which
+    fuses the head group with the token dimension in the MMA instruction.
+    """
+    env_override = os.environ.get("PYMLLM_FLASHINFER_USE_TENSOR_CORE")
+    if env_override is not None:
+        return env_override.lower() == "true"
+
+    try:
+        from flashinfer.decode import _grouped_size_compiled_for_decode_kernels
+
+        return not _grouped_size_compiled_for_decode_kernels(
+            num_attention_heads, num_kv_heads
+        )
+    except (ImportError, AttributeError):
+        pass
+
+    gqa_group_size = num_attention_heads // num_kv_heads
+    if kv_cache_dtype in (torch.float8_e4m3fn, torch.float8_e5m2):
+        return True
+    if kv_cache_dtype in (torch.float16, torch.half, torch.bfloat16):
+        return gqa_group_size >= 4
+    return False
+
+
+# ---------------------------------------------------------------------------
+# FlashInferAttnBackend
+# ---------------------------------------------------------------------------
+
+
+class FlashInferAttnBackend(AttentionBackend):
+    """FlashInfer-based attention backend for pymllm.
+
+    This class does not depend on a ``ModelRunner`` object.  Instead it takes
+    all required configuration explicitly so that it can be constructed
+    independently of any particular model runner.
+
+    Parameters
+    ----------
+    num_heads
+        Number of query heads per device (after any TP sharding).
+    num_kv_heads
+        Number of KV heads per device.
+    head_dim
+        Per-head dimension for Q and K.
+    kv_cache_dtype
+        ``torch.dtype`` of the KV cache (e.g. ``torch.float16``).
+    q_dtype
+        ``torch.dtype`` of the query tensor.
+    max_context_len
+        Maximum sequence length the model supports.
+    req_to_token
+        The ``[max_reqs, max_context_len]`` int32 tensor from
+        ``ReqToTokenPool.req_to_token``.
+    device
+        Target device (e.g. ``torch.device("cuda")``)
+    max_req_pool_size
+        Maximum number of concurrent requests (= ``ReqToTokenPool.size``).
+        Used to pre-allocate ``kv_indptr`` / ``kv_last_page_len`` buffers.
+    sliding_window_size
+        When not ``None``, enables sliding-window attention mode which
+        allocates two wrapper sets (window + full context).
+    skip_prefill
+        When ``True``, skip creating prefill wrappers (for backends that only
+        perform decode, e.g. multi-step draft backends).
+    kv_indptr_buf
+        Optional pre-allocated ``kv_indptr`` buffer.  Used when sharing
+        buffers across multiple backend instances (e.g. multi-step draft).
+    kv_last_page_len_buf
+        Optional pre-allocated ``kv_last_page_len`` buffer.
+    init_new_workspace
+        When ``True`` allocate a fresh workspace buffer instead of reusing the
+        global one.
+    """
+
+    def __init__(
+        self,
+        num_heads: int,
+        num_kv_heads: int,
+        head_dim: int,
+        kv_cache_dtype: torch.dtype,
+        q_dtype: torch.dtype,
+        max_context_len: int,
+        req_to_token: torch.Tensor,
+        device: torch.device,
+        max_req_pool_size: int,
+        sliding_window_size: Optional[int] = None,
+        skip_prefill: bool = False,
+        kv_indptr_buf: Optional[torch.Tensor] = None,
+        kv_last_page_len_buf: Optional[torch.Tensor] = None,
+        init_new_workspace: bool = False,
+    ):
+        if not _flashinfer_available:
+            raise RuntimeError(
+                "flashinfer is required for FlashInferAttnBackend but is not "
+                "installed.  Run: pip install flashinfer-python"
+            )
+
+        super().__init__()
+
+        self.num_heads = num_heads
+        self.num_kv_heads = num_kv_heads
+        self.head_dim = head_dim
+        self.kv_cache_dtype = kv_cache_dtype
+        self.q_dtype = q_dtype
+        self.max_context_len = max_context_len
+        self.req_to_token = req_to_token
+        self.device = device
+        self.skip_prefill = skip_prefill
+
+        # Tensor-core preference for decode
+        self.decode_use_tensor_cores = should_use_tensor_core(
+            kv_cache_dtype, num_heads, num_kv_heads
+        )
+
+        # Sliding-window / cross-attention wrapper dispatch
+        if sliding_window_size is not None:
+            self.num_wrappers = 2
+            self.dispatch_reason: Optional[WrapperDispatch] = (
+                WrapperDispatch.SLIDING_WINDOW
+            )
+            self.sliding_window_size: Optional[int] = sliding_window_size
+        else:
+            self.num_wrappers = 1
+            self.dispatch_reason = None
+            self.sliding_window_size = None
+
+        # ------------------------------------------------------------------
+        # Workspace buffer
+        # ------------------------------------------------------------------
+        global _global_workspace_buffer
+        if _global_workspace_buffer is None:
+            _global_workspace_buffer = torch.empty(
+                _DEFAULT_WORKSPACE_BYTES,
+                dtype=torch.uint8,
+                device=device,
+            )
+        if init_new_workspace:
+            self.workspace_buffer = torch.empty(
+                _DEFAULT_WORKSPACE_BYTES,
+                dtype=torch.uint8,
+                device=device,
+            )
+        else:
+            self.workspace_buffer = _global_workspace_buffer
+
+        # ------------------------------------------------------------------
+        # kv_indptr  [num_wrappers × (max_req_pool_size + 1)]
+        # kv_last_page_len  [max_req_pool_size]
+        # ------------------------------------------------------------------
+        if kv_indptr_buf is None:
+            self.kv_indptr: List[torch.Tensor] = [
+                torch.zeros((max_req_pool_size + 1,), dtype=torch.int32, device=device)
+                for _ in range(self.num_wrappers)
+            ]
+        else:
+            assert self.num_wrappers == 1
+            self.kv_indptr = [kv_indptr_buf]
+
+        if kv_last_page_len_buf is None:
+            self.kv_last_page_len = torch.ones(
+                (max_req_pool_size,), dtype=torch.int32, device=device
+            )
+        else:
+            assert self.num_wrappers == 1
+            self.kv_last_page_len = kv_last_page_len_buf
+
+        # qo_indptr – only needed for prefill
+        if not skip_prefill:
+            self.qo_indptr: List[torch.Tensor] = [
+                torch.zeros((max_req_pool_size + 1,), dtype=torch.int32, device=device)
+                for _ in range(self.num_wrappers)
+            ]
+
+        # ------------------------------------------------------------------
+        # Create FlashInfer wrappers
+        # ------------------------------------------------------------------
+        self.prefill_wrapper_ragged: Optional[
+            "BatchPrefillWithRaggedKVCacheWrapper"
+        ] = None
+        self.prefill_wrappers_paged: List["BatchPrefillWithPagedKVCacheWrapper"] = []
+        self.decode_wrappers: List["BatchDecodeWithPagedKVCacheWrapper"] = []
+
+        if not skip_prefill:
+            self.prefill_wrapper_ragged = BatchPrefillWithRaggedKVCacheWrapper(
+                self.workspace_buffer, "NHD"
+            )
+
+        for _ in range(self.num_wrappers):
+            if not skip_prefill:
+                self.prefill_wrappers_paged.append(
+                    BatchPrefillWithPagedKVCacheWrapper(self.workspace_buffer, "NHD")
+                )
+            self.decode_wrappers.append(
+                BatchDecodeWithPagedKVCacheWrapper(
+                    self.workspace_buffer,
+                    "NHD",
+                    use_tensor_cores=self.decode_use_tensor_cores,
+                )
+            )
+
+        # ------------------------------------------------------------------
+        # Indices updaters
+        # ------------------------------------------------------------------
+        if not skip_prefill:
+            self.indices_updater_prefill = _FlashInferIndicesUpdaterPrefill(self)
+        self.indices_updater_decode = _FlashInferIndicesUpdaterDecode(self)
+
+        # Per-batch metadata set by init_forward_metadata
+        self.forward_metadata: Optional[Union[DecodeMetadata, PrefillMetadata]] = None
+
+        # CUDA-graph metadata stores
+        self.decode_cuda_graph_metadata: dict = {}
+        self.prefill_cuda_graph_metadata: dict = {}
+
+    # ------------------------------------------------------------------
+    # init_forward_metadata
+    # ------------------------------------------------------------------
+
+    def init_forward_metadata(self, forward_batch: ForwardBatch) -> None:
+        """Prepare FlashInfer wrappers for the current batch.
+
+        Must be called once per batch before the model's ``forward`` method.
+        """
+        if forward_batch.forward_mode.is_decode_or_idle():
+            self.indices_updater_decode.update(
+                forward_batch.req_pool_indices,
+                forward_batch.seq_lens,
+                forward_batch.seq_lens_cpu,
+                forward_batch.seq_lens_sum,
+                decode_wrappers=self.decode_wrappers,
+            )
+            self.forward_metadata = DecodeMetadata(self.decode_wrappers)
+        else:
+            # Extend / prefill
+            prefix_lens = forward_batch.extend_prefix_lens
+            extend_no_prefix = (
+                forward_batch.extend_prefix_lens_cpu is not None
+                and not any(forward_batch.extend_prefix_lens_cpu)
+            )
+            use_ragged = extend_no_prefix
+
+            self.indices_updater_prefill.update(
+                forward_batch.req_pool_indices,
+                forward_batch.seq_lens,
+                forward_batch.seq_lens_cpu,
+                forward_batch.seq_lens_sum,
+                prefix_lens=prefix_lens,
+                prefill_wrappers=self.prefill_wrappers_paged,
+                use_ragged=use_ragged,
+            )
+            self.forward_metadata = PrefillMetadata(
+                self.prefill_wrappers_paged,
+                use_ragged=use_ragged,
+                extend_no_prefix=extend_no_prefix,
+            )
+
+    # ------------------------------------------------------------------
+    # forward_extend
+    # ------------------------------------------------------------------
+
+    def forward_extend(
+        self,
+        q: torch.Tensor,
+        k: Optional[torch.Tensor],
+        v: Optional[torch.Tensor],
+        layer: "RadixAttention",  # noqa: F821
+        forward_batch: ForwardBatch,
+        save_kv_cache: bool = True,
+        **kwargs,
+    ) -> torch.Tensor:
+        from pymllm.layers.attention.radix_attention import RadixAttention
+
+        assert isinstance(layer, RadixAttention)
+        meta: PrefillMetadata = self.forward_metadata
+
+        prefill_wrapper_paged = meta.prefill_wrappers[self._get_wrapper_idx(layer)]
+        cache_loc = forward_batch.out_cache_loc
+
+        # Write K/V into the pool
+        if k is not None:
+            assert v is not None
+            if save_kv_cache:
+                forward_batch.token_to_kv_pool.set_kv_buffer(
+                    layer.layer_id, cache_loc, k, v
+                )
+
+        q_3d = q.contiguous().view(-1, layer.tp_q_head_num, layer.head_dim)
+
+        if not meta.use_ragged:
+            # Paged-only path: uses the full KV cache (prefix + extend).
+            k_cache, v_cache = forward_batch.token_to_kv_pool.get_kv_buffer(
+                layer.layer_id
+            )
+            # Reshape to [buf_len, page_size=1, num_heads, head_dim] for FlashInfer.
+            paged_kv = (k_cache.unsqueeze(1), v_cache.unsqueeze(1))
+
+            o = prefill_wrapper_paged.forward(
+                q_3d,
+                paged_kv,
+                causal=not layer.is_cross_attention,
+                sm_scale=layer.scaling,
+                window_left=layer.sliding_window_size,
+                logits_soft_cap=layer.logit_cap if layer.logit_cap > 0 else None,
+            )
+        else:
+            # Ragged path: query attends only to the new (ragged) K/V;
+            # prefix K/V is in the paged pool.
+            if k is None:
+                # Fallback: load K/V from the pool.
+                k_buf, v_buf = forward_batch.token_to_kv_pool.get_kv_buffer(
+                    layer.layer_id
+                )
+                k = k_buf
+                v = v_buf
+
+            k_3d = k.view(-1, layer.tp_k_head_num, layer.head_dim)
+            v_3d = v.view(-1, layer.tp_v_head_num, layer.v_head_dim)
+
+            if meta.extend_no_prefix:
+                # Pure prefill – no prefix at all.
+                o = self.prefill_wrapper_ragged.forward(
+                    q_3d,
+                    k_3d,
+                    v_3d,
+                    causal=True,
+                    sm_scale=layer.scaling,
+                    logits_soft_cap=(layer.logit_cap if layer.logit_cap > 0 else None),
+                )
+            else:
+                # Extend with prefix: merge ragged (new) and paged (prefix).
+                o1, s1 = self.prefill_wrapper_ragged.forward_return_lse(
+                    q_3d,
+                    k_3d,
+                    v_3d,
+                    causal=True,
+                    sm_scale=layer.scaling,
+                    logits_soft_cap=(layer.logit_cap if layer.logit_cap > 0 else None),
+                )
+
+                k_cache, v_cache = forward_batch.token_to_kv_pool.get_kv_buffer(
+                    layer.layer_id
+                )
+                paged_kv = (k_cache.unsqueeze(1), v_cache.unsqueeze(1))
+                o2, s2 = prefill_wrapper_paged.forward_return_lse(
+                    q_3d,
+                    paged_kv,
+                    causal=False,
+                    sm_scale=layer.scaling,
+                    logits_soft_cap=(layer.logit_cap if layer.logit_cap > 0 else None),
+                )
+
+                o, _ = merge_state(o1, s1, o2, s2)
+
+        return o.view(-1, layer.tp_q_head_num * layer.v_head_dim)
+
+    # ------------------------------------------------------------------
+    # forward_decode
+    # ------------------------------------------------------------------
+
+    def forward_decode(
+        self,
+        q: torch.Tensor,
+        k: Optional[torch.Tensor],
+        v: Optional[torch.Tensor],
+        layer: "RadixAttention",  # noqa: F821
+        forward_batch: ForwardBatch,
+        save_kv_cache: bool = True,
+        **kwargs,
+    ) -> torch.Tensor:
+        from pymllm.layers.attention.radix_attention import RadixAttention
+
+        assert isinstance(layer, RadixAttention)
+        meta: DecodeMetadata = self.forward_metadata
+
+        decode_wrapper = meta.decode_wrappers[self._get_wrapper_idx(layer)]
+        cache_loc = forward_batch.out_cache_loc
+
+        if k is not None:
+            assert v is not None
+            if save_kv_cache:
+                forward_batch.token_to_kv_pool.set_kv_buffer(
+                    layer.layer_id, cache_loc, k, v
+                )
+
+        k_cache, v_cache = forward_batch.token_to_kv_pool.get_kv_buffer(layer.layer_id)
+        paged_kv = (k_cache.unsqueeze(1), v_cache.unsqueeze(1))
+
+        o = decode_wrapper.forward(
+            q.contiguous().view(-1, layer.tp_q_head_num, layer.head_dim),
+            paged_kv,
+            sm_scale=layer.scaling,
+            logits_soft_cap=layer.logit_cap if layer.logit_cap > 0 else None,
+        )
+
+        return o.view(-1, layer.tp_q_head_num * layer.v_head_dim)
+
+    # ------------------------------------------------------------------
+    # CUDA-graph support
+    # ------------------------------------------------------------------
+
+    def get_cuda_graph_seq_len_fill_value(self) -> int:
+        return 1
+
+    def init_cuda_graph_state(
+        self,
+        max_bs: int,
+        max_num_tokens: int,
+        kv_indices_buf: Optional[torch.Tensor] = None,
+    ) -> None:
+        """Allocate CUDA-graph shared state buffers."""
+        if kv_indices_buf is None:
+            cuda_graph_kv_indices = torch.zeros(
+                (max_num_tokens * self.max_context_len,),
+                dtype=torch.int32,
+                device=self.device,
+            )
+        else:
+            cuda_graph_kv_indices = kv_indices_buf
+
+        self.cuda_graph_kv_indices = [cuda_graph_kv_indices] + [
+            cuda_graph_kv_indices.clone() for _ in range(self.num_wrappers - 1)
+        ]
+
+        if not self.skip_prefill:
+            self.cuda_graph_custom_mask = torch.zeros(
+                (max_num_tokens * self.max_context_len,),
+                dtype=torch.uint8,
+                device=self.device,
+            )
+            self.cuda_graph_qk_indptr = [x.clone() for x in self.kv_indptr]
+            self.cuda_graph_qo_indptr = [x.clone() for x in self.kv_indptr]
+
+    def init_forward_metadata_capture_cuda_graph(
+        self,
+        bs: int,
+        num_tokens: int,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+        forward_mode: ForwardMode,
+    ) -> None:
+        """Set up metadata for CUDA-graph capture of a decode step."""
+        if not forward_mode.is_decode_or_idle():
+            raise ValueError(
+                "CUDA-graph capture is only supported for decode / idle modes."
+            )
+
+        decode_wrappers = []
+        for i in range(self.num_wrappers):
+            decode_wrappers.append(
+                BatchDecodeWithPagedKVCacheWrapper(
+                    self.workspace_buffer,
+                    "NHD",
+                    use_cuda_graph=True,
+                    use_tensor_cores=self.decode_use_tensor_cores,
+                    paged_kv_indptr_buffer=self.kv_indptr[i][: num_tokens + 1],
+                    paged_kv_indices_buffer=self.cuda_graph_kv_indices[i],
+                    paged_kv_last_page_len_buffer=self.kv_last_page_len[:num_tokens],
+                )
+            )
+
+        seq_lens_sum = seq_lens.sum().item()
+        self.indices_updater_decode.update(
+            req_pool_indices,
+            seq_lens,
+            seq_lens.cpu(),
+            seq_lens_sum,
+            decode_wrappers=decode_wrappers,
+        )
+        self.decode_cuda_graph_metadata[bs] = decode_wrappers
+        self.forward_metadata = DecodeMetadata(decode_wrappers)
+
+        if _has_fast_decode_plan:
+            for i in range(self.num_wrappers):
+                decode_wrappers[i].begin_forward = _partial(
+                    fast_decode_plan, decode_wrappers[i]
+                )
+
+    def init_forward_metadata_replay_cuda_graph(
+        self,
+        bs: int,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+        seq_lens_sum: int,
+        forward_mode: ForwardMode,
+        seq_lens_cpu: Optional[torch.Tensor],
+    ) -> None:
+        """Update metadata when replaying a CUDA graph for decode."""
+        if not forward_mode.is_decode_or_idle():
+            raise ValueError(
+                "CUDA-graph replay is only supported for decode / idle modes."
+            )
+
+        self.indices_updater_decode.update(
+            req_pool_indices[:bs],
+            seq_lens[:bs],
+            seq_lens_cpu[:bs] if seq_lens_cpu is not None else None,
+            seq_lens_sum,
+            decode_wrappers=self.decode_cuda_graph_metadata[bs],
+        )
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+
+    def _get_wrapper_idx(self, layer) -> int:
+        """Return the wrapper index for the given attention layer."""
+        if self.num_wrappers == 1:
+            return 0
+        if self.dispatch_reason == WrapperDispatch.SLIDING_WINDOW:
+            # Wrapper 0 → sliding window attention.
+            # Wrapper 1 → full-context attention.
+            return int(layer.sliding_window_size == -1)
+        raise ValueError(f"Unknown dispatch reason: {self.dispatch_reason}")
+
+
+# ---------------------------------------------------------------------------
+# _FlashInferIndicesUpdaterDecode
+# ---------------------------------------------------------------------------
+
+
+class _FlashInferIndicesUpdaterDecode:
+    """Populates ``kv_indptr`` / ``kv_indices`` and calls
+    ``wrapper.begin_forward`` before every decode step.
+    """
+
+    def __init__(self, backend: FlashInferAttnBackend):
+        self.num_qo_heads = backend.num_heads
+        self.num_kv_heads = backend.num_kv_heads
+        self.head_dim = backend.head_dim
+        self.data_type = backend.kv_cache_dtype
+        self.q_data_type = backend.q_dtype
+        self.sliding_window_size = backend.sliding_window_size
+        self.backend = backend
+
+        self.kv_indptr = backend.kv_indptr
+        self.kv_last_page_len = backend.kv_last_page_len
+        self.req_to_token = backend.req_to_token
+
+    def update(
+        self,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+        seq_lens_cpu: Optional[torch.Tensor],
+        seq_lens_sum: int,
+        decode_wrappers: "List[BatchDecodeWithPagedKVCacheWrapper]",
+        kv_start_idx: Optional[torch.Tensor] = None,
+    ) -> None:
+        if self.backend.dispatch_reason == WrapperDispatch.SLIDING_WINDOW:
+            self._update_sliding_window(
+                req_pool_indices,
+                seq_lens,
+                seq_lens_cpu,
+                seq_lens_sum,
+                decode_wrappers,
+            )
+        else:
+            # Single-wrapper: full-context decode. Build kv_indptr/kv_indices
+            # and call FlashInfer's plan function via the CUDA kernel.
+            bs = len(req_pool_indices)
+            kv_indptr = self.kv_indptr[0]
+
+            # Fill kv_indptr: prefix sums of paged_kernel_lens.
+            kv_indptr[1 : bs + 1] = torch.cumsum(seq_lens, dim=0)
+            kv_indptr_sliced = kv_indptr[: bs + 1]
+
+            if seq_lens_cpu is not None:
+                seq_lens_sum = int(seq_lens_cpu.sum().item())
+            else:
+                seq_lens_sum = int(seq_lens.sum().item())
+
+            # Allocate KV indices buffer.
+            if decode_wrappers and decode_wrappers[0].is_cuda_graph_enabled:
+                kv_indices = decode_wrappers[0]._paged_kv_indices_buf
+            else:
+                kv_indices = torch.empty(
+                    seq_lens_sum, dtype=torch.int32, device=self.req_to_token.device
+                )
+
+            # Use high-performance CUDA kernel to populate kv_indices.
+            create_kv_indices(
+                self.req_to_token,
+                req_pool_indices.to(torch.int32),
+                seq_lens.to(torch.int32),
+                kv_indptr_sliced,
+                None,
+                kv_indices,
+            )
+
+            decode_wrappers = decode_wrappers or self.decode_wrappers
+            decode_wrappers[0].begin_forward(
+                kv_indptr_sliced,
+                kv_indices,
+                self.kv_last_page_len[:bs],
+                self.num_qo_heads,
+                self.num_kv_heads,
+                self.head_dim,
+                1,
+                data_type=self.data_type,
+                q_data_type=self.q_data_type,
+                non_blocking=True,
+            )
+
+    def _update_sliding_window(
+        self,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+        seq_lens_cpu: Optional[torch.Tensor],
+        seq_lens_sum: int,
+        decode_wrappers: "List[BatchDecodeWithPagedKVCacheWrapper]",
+    ) -> None:
+        assert self.sliding_window_size is not None
+        for wrapper_id in range(2):
+            if wrapper_id == 0:
+                # Sliding-window attention: clamp to window size + 1
+                paged_kernel_lens = torch.clamp(
+                    seq_lens, max=self.sliding_window_size + 1
+                )
+                paged_kernel_lens_sum = int(paged_kernel_lens.sum().item())
+                kv_start_idx = seq_lens - paged_kernel_lens
+                seq_lens_cpu_tmp = (
+                    torch.clamp(seq_lens_cpu, max=self.sliding_window_size + 1)
+                    if seq_lens_cpu is not None
+                    else None
+                )
+            else:
+                # Full-context attention
+                paged_kernel_lens = seq_lens
+                paged_kernel_lens_sum = seq_lens_sum
+                kv_start_idx = None
+                seq_lens_cpu_tmp = seq_lens_cpu
+
+            bs = len(req_pool_indices)
+            kv_indptr = self.kv_indptr[wrapper_id]
+            kv_indptr[1 : bs + 1] = torch.cumsum(paged_kernel_lens, dim=0)
+            kv_indptr_sliced = kv_indptr[: bs + 1]
+
+            if decode_wrappers and decode_wrappers[wrapper_id].is_cuda_graph_enabled:
+                kv_indices = decode_wrappers[wrapper_id]._paged_kv_indices_buf
+            else:
+                kv_indices = torch.empty(
+                    paged_kernel_lens_sum,
+                    dtype=torch.int32,
+                    device=self.req_to_token.device,
+                )
+
+            # High-performance CUDA kernel populates kv_indices from req_to_token.
+            create_kv_indices(
+                self.req_to_token,
+                req_pool_indices.to(torch.int32),
+                paged_kernel_lens.to(torch.int32),
+                kv_indptr_sliced,
+                kv_start_idx.to(torch.int32) if kv_start_idx is not None else None,
+                kv_indices,
+            )
+
+            decode_wrappers[wrapper_id].begin_forward(
+                kv_indptr_sliced,
+                kv_indices,
+                self.kv_last_page_len[:bs],
+                self.num_qo_heads,
+                self.num_kv_heads,
+                self.head_dim,
+                1,
+                data_type=self.data_type,
+                q_data_type=self.q_data_type,
+                non_blocking=True,
+            )
+
+
+# ---------------------------------------------------------------------------
+# _FlashInferIndicesUpdaterPrefill
+# ---------------------------------------------------------------------------
+
+
+class _FlashInferIndicesUpdaterPrefill:
+    """Populates indices and calls ``wrapper.begin_forward`` before extend."""
+
+    def __init__(self, backend: FlashInferAttnBackend):
+        self.num_qo_heads = backend.num_heads
+        self.num_kv_heads = backend.num_kv_heads
+        self.head_dim = backend.head_dim
+        self.data_type = backend.kv_cache_dtype
+        self.q_data_type = backend.q_dtype
+        self.sliding_window_size = backend.sliding_window_size
+        self.backend = backend
+
+        self.kv_indptr = backend.kv_indptr
+        self.kv_last_page_len = backend.kv_last_page_len
+        self.qo_indptr = backend.qo_indptr
+        self.req_to_token = backend.req_to_token
+        self.prefill_wrapper_ragged = backend.prefill_wrapper_ragged
+
+    def update(
+        self,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+        seq_lens_cpu: Optional[torch.Tensor],
+        seq_lens_sum: int,
+        prefix_lens: Optional[torch.Tensor],
+        prefill_wrappers: "List[BatchPrefillWithPagedKVCacheWrapper]",
+        use_ragged: bool,
+    ) -> None:
+        if self.backend.dispatch_reason == WrapperDispatch.SLIDING_WINDOW:
+            self._update_sliding_window(
+                req_pool_indices,
+                seq_lens,
+                seq_lens_cpu,
+                seq_lens_sum,
+                prefix_lens,
+                prefill_wrappers,
+                use_ragged,
+            )
+        else:
+            if use_ragged:
+                paged_kernel_lens = prefix_lens
+                paged_kernel_lens_sum = paged_kernel_lens.sum().item()
+            else:
+                paged_kernel_lens = seq_lens
+                paged_kernel_lens_sum = seq_lens_sum
+
+            self._call_begin_forward(
+                self.prefill_wrapper_ragged,
+                prefill_wrappers[0],
+                req_pool_indices,
+                paged_kernel_lens,
+                paged_kernel_lens_sum,
+                seq_lens,
+                prefix_lens,
+                kv_start_idx=None,
+                kv_indptr=self.kv_indptr[0],
+                qo_indptr=self.qo_indptr[0],
+                use_ragged=use_ragged,
+            )
+
+    def _update_sliding_window(
+        self,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+        seq_lens_cpu: Optional[torch.Tensor],
+        seq_lens_sum: int,
+        prefix_lens: Optional[torch.Tensor],
+        prefill_wrappers: "List[BatchPrefillWithPagedKVCacheWrapper]",
+        use_ragged: bool,
+    ) -> None:
+        assert self.sliding_window_size is not None
+        for wrapper_id in range(2):
+            if wrapper_id == 0:
+                # Sliding-window portion uses a limited context window.
+                extend_lens = seq_lens - prefix_lens
+                paged_kernel_lens = torch.minimum(
+                    seq_lens,
+                    torch.tensor(self.sliding_window_size, device=seq_lens.device)
+                    + extend_lens,
+                )
+                paged_kernel_lens_sum = int(paged_kernel_lens.sum().item())
+                kv_start_idx = seq_lens - paged_kernel_lens
+            else:
+                # Full-context portion.
+                paged_kernel_lens = seq_lens
+                paged_kernel_lens_sum = seq_lens_sum
+                kv_start_idx = None
+
+            kv_indptr = self.kv_indptr[wrapper_id]
+            qo_indptr = self.qo_indptr[wrapper_id]
+
+            self._call_begin_forward(
+                self.prefill_wrapper_ragged,
+                prefill_wrappers[wrapper_id],
+                req_pool_indices,
+                paged_kernel_lens,
+                paged_kernel_lens_sum,
+                seq_lens,
+                prefix_lens,
+                kv_start_idx=kv_start_idx,
+                kv_indptr=kv_indptr,
+                qo_indptr=qo_indptr,
+                use_ragged=use_ragged,
+            )
+
+    def _call_begin_forward(
+        self,
+        wrapper_ragged: "BatchPrefillWithRaggedKVCacheWrapper",
+        wrapper_paged: "BatchPrefillWithPagedKVCacheWrapper",
+        req_pool_indices: torch.Tensor,
+        paged_kernel_lens: torch.Tensor,
+        paged_kernel_lens_sum: int,
+        seq_lens: torch.Tensor,
+        prefix_lens: Optional[torch.Tensor],
+        kv_start_idx: Optional[torch.Tensor],
+        kv_indptr: torch.Tensor,
+        qo_indptr: torch.Tensor,
+        use_ragged: bool,
+    ) -> None:
+        bs = len(seq_lens)
+
+        # Build kv_indptr and kv_indices using the CUDA kernel.
+        kv_indptr_sliced = kv_indptr[: bs + 1]
+        kv_indptr_sliced[1:] = torch.cumsum(paged_kernel_lens, dim=0)
+
+        kv_indices = torch.empty(
+            paged_kernel_lens_sum + 256,
+            dtype=torch.int32,
+            device=req_pool_indices.device,
+        )
+
+        create_kv_indices(
+            self.req_to_token,
+            req_pool_indices.to(torch.int32),
+            paged_kernel_lens.to(torch.int32),
+            kv_indptr_sliced,
+            kv_start_idx.to(torch.int32) if kv_start_idx is not None else None,
+            kv_indices,
+        )
+
+        # Build qo_indptr (number of new tokens per sequence).
+        if prefix_lens is not None:
+            extend_lens = seq_lens - prefix_lens
+        else:
+            extend_lens = seq_lens
+        qo_indptr_sliced = qo_indptr[: bs + 1]
+        qo_indptr_sliced[1:] = torch.cumsum(extend_lens, dim=0)
+
+        # Plan the ragged wrapper (new tokens only).
+        if use_ragged:
+            wrapper_ragged.begin_forward(
+                qo_indptr_sliced,
+                qo_indptr_sliced,
+                self.num_qo_heads,
+                self.num_kv_heads,
+                self.head_dim,
+                q_data_type=self.q_data_type,
+            )
+
+        # Plan the paged wrapper (cached prefix tokens).
+        wrapper_paged.begin_forward(
+            qo_indptr_sliced,
+            kv_indptr_sliced,
+            kv_indices,
+            self.kv_last_page_len[:bs],
+            self.num_qo_heads,
+            self.num_kv_heads,
+            self.head_dim,
+            1,
+            q_data_type=self.q_data_type,
+            kv_data_type=self.data_type,
+            non_blocking=True,
+        )
diff --git a/pymllm/layers/attention/radix_attention.py b/pymllm/layers/attention/radix_attention.py
index e69de29b..114130db 100644
--- a/pymllm/layers/attention/radix_attention.py
+++ b/pymllm/layers/attention/radix_attention.py
@@ -0,0 +1,171 @@
+"""RadixAttention -- the attention layer used by pymllm models.
+
+This module is kept small intentionally: all heavy computation is delegated
+to the pluggable ``AttentionBackend`` that is attached to the ``ForwardBatch``.
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+from typing import TYPE_CHECKING, Optional
+
+import torch
+from torch import nn
+
+if TYPE_CHECKING:
+    from pymllm.engine.forward_batch import ForwardBatch
+
+
+# ---------------------------------------------------------------------------
+# AttentionType
+# ---------------------------------------------------------------------------
+
+
+class AttentionType(Enum):
+    """Attention variant used by a :class:`RadixAttention` layer.
+
+    Uses string values so that ``torch.compile`` can treat them as constants.
+    """
+
+    # Standard causal self-attention in a decoder layer.
+    DECODER = "decoder"
+
+    # Bidirectional self-attention for image tokens inside a decoder
+    # (e.g. VLM visual encoder embedded in the language model).
+    DECODER_BIDIRECTIONAL = "decoder_bidirectional"
+
+    # Full bidirectional self-attention in an encoder-only model.
+    ENCODER_ONLY = "encoder_only"
+
+
+# ---------------------------------------------------------------------------
+# RadixAttention
+# ---------------------------------------------------------------------------
+
+
+class RadixAttention(nn.Module):
+    """Attention layer that delegates computation to a pluggable backend.
+
+    Each transformer attention layer in a pymllm model creates exactly one
+    ``RadixAttention`` with a unique ``layer_id``.  During the forward pass
+    the layer looks up the correct KV buffer via ``layer_id`` and calls the
+    backend attached to the current :class:`~pymllm.engine.forward_batch.ForwardBatch`.
+
+    Parameters
+    ----------
+    num_heads
+        Number of query attention heads (after any tensor-parallelism
+        sharding; pass the full count if not using TP).
+    head_dim
+        Per-head dimension for query and key projections.
+    scaling
+        Softmax pre-scale, typically ``1 / sqrt(head_dim)``.
+    num_kv_heads
+        Number of key / value heads (supports GQA / MQA).
+    layer_id
+        Zero-based index of this layer within the model.  Used to index into
+        ``KVPool.k_buffer`` / ``v_buffer``.
+    logit_cap
+        If > 0, attention logits are soft-capped to this value via a ``tanh``
+        gate (used by Gemma2 / Gemma3 style models).  Set to ``0.0`` to
+        disable.
+    v_head_dim
+        Per-head dimension of the value projection.  Defaults to ``head_dim``
+        (i.e. standard square QKV).
+    sliding_window_size
+        Sliding-window attention span.  ``-1`` means full context (no window).
+    is_cross_attention
+        ``True`` for cross-attention layers in encoder-decoder models.
+    attn_type
+        One of :class:`AttentionType`.
+    """
+
+    def __init__(
+        self,
+        num_heads: int,
+        head_dim: int,
+        scaling: float,
+        num_kv_heads: int,
+        layer_id: int,
+        logit_cap: float = 0.0,
+        v_head_dim: int = -1,
+        sliding_window_size: int = -1,
+        is_cross_attention: bool = False,
+        attn_type: AttentionType = AttentionType.DECODER,
+    ):
+        super().__init__()
+
+        self.tp_q_head_num: int = num_heads
+        self.tp_k_head_num: int = num_kv_heads
+        self.tp_v_head_num: int = num_kv_heads
+
+        self.head_dim: int = head_dim
+        self.qk_head_dim: int = head_dim
+        self.v_head_dim: int = v_head_dim if v_head_dim != -1 else head_dim
+
+        self.scaling: float = scaling
+        self.layer_id: int = layer_id
+        self.logit_cap: float = logit_cap
+        self.sliding_window_size: int = (
+            sliding_window_size if sliding_window_size is not None else -1
+        )
+        self.is_cross_attention: bool = is_cross_attention
+        self.attn_type: AttentionType = attn_type
+
+    # ------------------------------------------------------------------
+    # forward
+    # ------------------------------------------------------------------
+
+    def forward(
+        self,
+        q: torch.Tensor,
+        k: Optional[torch.Tensor],
+        v: Optional[torch.Tensor],
+        forward_batch: "ForwardBatch",
+        save_kv_cache: bool = True,
+        **kwargs,
+    ) -> torch.Tensor:
+        """Run attention for one batch.
+
+        Parameters
+        ----------
+        q
+            Query tensor, shape ``[num_tokens, tp_q_head_num * head_dim]``
+            (or already reshaped to ``[num_tokens, tp_q_head_num, head_dim]``).
+        k
+            Key tensor, same leading dimension as ``q``, shape
+            ``[num_tokens, tp_k_head_num * qk_head_dim]``.
+            Pass ``None`` for cross-layer KV sharing (``v`` must also be
+            ``None`` in this case).
+        v
+            Value tensor, shape
+            ``[num_tokens, tp_v_head_num * v_head_dim]``.
+        forward_batch
+            Batch metadata and references to memory pools / backend.
+        save_kv_cache
+            When ``False``, skip writing K/V into the pool (useful for draft
+            models in speculative decoding).
+        **kwargs
+            Passed through to the backend (e.g. ``q_rope``, ``k_rope``).
+        """
+        if k is not None:
+            assert v is not None, "k and v must both be provided or both be None"
+            k = k.view(-1, self.tp_k_head_num, self.qk_head_dim)
+            v = v.view(-1, self.tp_v_head_num, self.v_head_dim)
+
+        return forward_batch.attn_backend.forward(
+            q, k, v, self, forward_batch, save_kv_cache, **kwargs
+        )
+
+    def extra_repr(self) -> str:
+        return (
+            f"layer_id={self.layer_id}, "
+            f"q_heads={self.tp_q_head_num}, "
+            f"kv_heads={self.tp_k_head_num}, "
+            f"head_dim={self.head_dim}, "
+            f"v_head_dim={self.v_head_dim}, "
+            f"scaling={self.scaling:.4f}, "
+            f"logit_cap={self.logit_cap}, "
+            f"sliding_window={self.sliding_window_size}, "
+            f"attn_type={self.attn_type.value}"
+        )
diff --git a/pymllm/layers/sampling.py b/pymllm/layers/sampling.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pymllm/mem_cache/memory_pool.py b/pymllm/mem_cache/memory_pool.py
index 0721fd71..f9c176a9 100644
--- a/pymllm/mem_cache/memory_pool.py
+++ b/pymllm/mem_cache/memory_pool.py
@@ -6,7 +6,7 @@
     TokenToKVPoolAllocator  manages a free-list of integer indices
     KVPool                  holds the actual GPU K/V tensors
 
-All indices are **int64** tensors on the target device.  Slot 0 in the KV
+All indices are **int32** tensors on the target device.  Slot 0 in the KV
 buffers is reserved as a padding / dummy-output slot and is never allocated.
 """
 
@@ -210,7 +210,7 @@ class TokenToKVPoolAllocator:
         allocator = TokenToKVPoolAllocator(size=4096, device="cuda")
 
         # --- basic alloc / free ---
-        indices = allocator.alloc(128)      # 128 free slot indices (int64)
+        indices = allocator.alloc(128)      # 128 free slot indices (int32)
         allocator.free(indices[:64])        # return 64 slots
 
         # --- batch free (amortised) ---
@@ -251,14 +251,14 @@ def clear(self) -> None:
         """Reset the allocator so that all slots ``[1, size]`` are free. The first slot is reserved for padding."""
         if self.page_size == 1:
             self.free_slots = torch.arange(
-                1, self.size + 1, dtype=torch.int64, device=self.device
+                1, self.size + 1, dtype=torch.int32, device=self.device
             )
         else:
             num_pages = self.size // self.page_size
             self.free_slots = torch.arange(
-                1, num_pages + 1, dtype=torch.int64, device=self.device
+                1, num_pages + 1, dtype=torch.int32, device=self.device
             )
-        self.release_slots = torch.empty((0,), dtype=torch.int64, device=self.device)
+        self.release_slots = torch.empty((0,), dtype=torch.int32, device=self.device)
         self._is_not_in_free_group = True
         self._free_group: List[torch.Tensor] = []
 
@@ -273,7 +273,7 @@ def merge_and_sort_free(self) -> None:
         self.free_slots = torch.cat((self.free_slots, self.release_slots))
         if self.need_sort:
             self.free_slots, _ = torch.sort(self.free_slots)
-        self.release_slots = torch.empty((0,), dtype=torch.int64, device=self.device)
+        self.release_slots = torch.empty((0,), dtype=torch.int32, device=self.device)
 
     def free_group_begin(self) -> None:
         """Start collecting ``free()`` calls; actual release is deferred to ``free_group_end``."""
@@ -290,7 +290,7 @@ def free_group_end(self) -> None:
     def alloc(self, need_size: int) -> Optional[torch.Tensor]:
         """Allocate *need_size* token indices.
 
-        Returns a 1-D ``int64`` tensor on success, or ``None`` if the pool is
+        Returns a 1-D ``int32`` tensor on success, or ``None`` if the pool is
         exhausted.
         """
         if self.page_size == 1:
@@ -380,7 +380,7 @@ def __init__(
         self.device = torch.device(device)
 
         self.req_to_token = torch.zeros(
-            (max_reqs, max_context_len), dtype=torch.int64, device=self.device
+            (max_reqs, max_context_len), dtype=torch.int32, device=self.device
         )
         self._free_slots: List[int] = list(range(max_reqs))
 
diff --git a/pymllm/orchestrator/cuda_ipc_transport.py b/pymllm/orchestrator/cuda_ipc_transport.py
index 7052f0e8..938132c8 100644
--- a/pymllm/orchestrator/cuda_ipc_transport.py
+++ b/pymllm/orchestrator/cuda_ipc_transport.py
@@ -1,373 +1,648 @@
 """
-CUDA IPC Transport for zero-copy tensor sharing between processes.
+CUDA IPC Transport for zero-copy GPU tensor sharing between processes.
 
-This module implements CUDA IPC with workspace buffer management
-to avoid PyTorch's memory leak issue when sharing IPC handles.
+## Background
 
-1. Create a workspace buffer on GPU (pre-allocated memory pool)
-2. Copy tensor data to a chunk in the workspace
-3. Get CUDA IPC handle for the chunk
-4. Send handle + metadata (shape, dtype, offset) to another process
-5. Reconstruct tensor in target process from IPC handle
-6. Copy to local tensor and mark chunk as reusable
+When sharing CUDA tensors between processes, there are two fundamentally different paths:
 
-Key Problem Solved:
-    PyTorch never releases tensors whose IPC handles are shared until process ends.
-    Solution: Use a fixed-size workspace buffer and recycle chunks.
+1. **CPU shared memory path** (``enable_shared_queue=True, enable_cuda_ipc=False``):
+   GPU tensors are moved to CPU / POSIX shared memory via ``tensor.share_memory_()``.
+   This is safe but incurs a GPU→CPU copy which is expensive for large vision features.
+
+2. **CUDA IPC path** (``enable_cuda_ipc=True``):
+   GPU tensors stay on GPU. PyTorch's ``storage._share_cuda_()`` yields a serialisable
+   IPC handle; the receiver calls ``UntypedStorage._new_shared_cuda(*handle)`` to map
+   the same physical GPU memory without any copy.
+
+These two paths are **mutually exclusive for GPU tensors**. ``enable_cuda_ipc`` takes
+priority; when active the CPU-copy step in ``TensorQueue._make_tensors_shareable`` is
+skipped.
+
+## CUDA IPC memory-leak problem and its fix
+
+PyTorch never releases the GPU allocation backing an IPC-exported tensor until the
+*sending* process exits. If we export raw model tensors we permanently leak GPU memory.
+
+**Solution** (pool-based recycling via ``MmItemMemoryPool``):
+
+* Allocate a single, fixed-size GPU workspace (``MmItemMemoryPool``).
+* For each outgoing GPU tensor, copy it into a chunk of the workspace and export the
+  *chunk* via IPC (the workspace is never freed; its chunks are recycled).
+* After the receiving process has finished with the data it writes a sync flag
+  (``ShmSyncBuffer``) to signal that the chunk may be reused.
+* A background recycler thread in the sender walks ``occupied_chunks`` and returns
+  chunks whose sync flag has been incremented back to ``available_chunks``.
+
+## Transport modes
+
+``TensorTransportMode``:
+* ``"default"``  – CPU/shared-memory path; no CUDA IPC.
+* ``"cuda_ipc"`` – Simple CUDA IPC: wraps GPU tensors in ``TransportProxyTensor``
+  (a ``torch.Tensor`` subclass whose ``__getstate__``/``__setstate__`` use
+  ``_share_cuda_``).  Suitable for single-process-group scenarios; incurs the
+  PyTorch memory-leak noted above.
+* ``"cuda_ipc_pool"`` – Pool-based CUDA IPC: copies GPU tensors into a pre-allocated
+  ``MmItemMemoryPool`` and wraps the slice in ``CudaIpcTensorTransportProxy``.
+  The pool is recycled, so there is no memory leak.
 """
 
+from __future__ import annotations
+
+import fcntl
 import logging
-import struct
-import uuid
-from dataclasses import dataclass
-from multiprocessing import Queue
-from multiprocessing.shared_memory import SharedMemory
-from typing import Any, Dict, List, Optional, Tuple
+import threading
+import time
+from multiprocessing import shared_memory
+from typing import Any, Dict, List, Literal, Optional, Tuple
 
+import numpy as np
 import torch
-import torch.cuda as cuda
 
 logger = logging.getLogger(__name__)
 
+# ---------------------------------------------------------------------------
+# Type alias for transport mode
+# ---------------------------------------------------------------------------
+
+TensorTransportMode = Literal["default", "cuda_ipc", "cuda_ipc_pool"]
 
-@dataclass
-class MemoryChunk:
-    """Represents a chunk in the workspace buffer."""
 
-    offset: int  # Offset in bytes from workspace start
-    size: int  # Size in bytes
-    in_use: bool  # Whether the chunk is currently occupied
-    sync_shm_name: Optional[str] = None  # Shared memory name for sync flag
+# ---------------------------------------------------------------------------
+# ShmSyncBuffer – a tiny POSIX shared memory float used as a sync counter
+# ---------------------------------------------------------------------------
 
 
-class WorkspaceBuffer:
-    """GPU memory pool for storing multimodal tensors temporarily.
+class ShmSyncBuffer:
+    """A single float32 in POSIX shared memory used as a sync counter.
 
-    This prevents the PyTorch IPC handle memory leak by using a fixed-size
-    pre-allocated buffer and recycling chunks.
+    The sender resets it to 0 before exporting a chunk.  The receiver
+    increments it (atomically under a file lock) once it has finished copying
+    data out of the chunk.  When the value reaches the number of consumers
+    (``tp_size``) the sender recycles the chunk.
     """
 
-    def __init__(self, size_gb: float = 4.0, device: int = 0):
-        """Initialize workspace buffer.
+    def __init__(self, byte_size: int = 4) -> None:
+        self.buffer = shared_memory.SharedMemory(create=True, size=byte_size)
+        self._arr = np.ndarray(1, dtype=np.float32, buffer=self.buffer.buf)
+        self._arr *= 0  # initialise to 0
+        self.meta_data: Dict[str, Any] = {
+            "handle": self.buffer.name,
+            "shape": self._arr.shape,
+            "dtype": str(self._arr.dtype),
+        }
+
+    # ------------------------------------------------------------------
+    # Helpers consumed by the *receiver* side
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def open(
+        meta_data: Dict[str, Any],
+    ) -> Tuple[shared_memory.SharedMemory, np.ndarray]:
+        """Open an existing ShmSyncBuffer from the metadata dict."""
+        shm = shared_memory.SharedMemory(name=meta_data["handle"])
+        arr = np.ndarray(meta_data["shape"], dtype=meta_data["dtype"], buffer=shm.buf)
+        return shm, arr
+
+    def __del__(self) -> None:
+        try:
+            self.buffer.close()
+            self.buffer.unlink()
+        except Exception:
+            pass
 
-        Args:
-            size_gb: Total size of workspace in GB
-            device: CUDA device ID
-        """
-        self.device = device
-        self.total_size = int(size_gb * 1024 * 1024 * 1024)  # Convert GB to bytes
 
-        # Allocate workspace on GPU
-        with torch.cuda.device(device):
-            self.workspace = torch.empty(
-                self.total_size // 4,  # Divide by 4 because we use float32
-                dtype=torch.float32,
-                device=f"cuda:{device}",
-            )
+# Lock file used to serialise writes to sync flags across processes
+_SHM_LOCK_FILE = "/tmp/pymllm_shm_wr_lock.lock"
 
-        # Initialize chunk management
-        self.chunks: List[MemoryChunk] = [
-            MemoryChunk(offset=0, size=self.total_size, in_use=False)
-        ]
 
-        # Container for reusable sync buffers
-        self.sync_buffer_pool: List[str] = []
+def _increment_sync_flag(meta_data: Dict[str, Any]) -> None:
+    """Increment the sync flag by 1 under a process-level file lock."""
+    shm, arr = ShmSyncBuffer.open(meta_data)
+    try:
+        open(_SHM_LOCK_FILE, "a").close()  # ensure file exists
+        with open(_SHM_LOCK_FILE, "w+") as f:
+            fcntl.flock(f, fcntl.LOCK_EX)
+            arr += 1.0
+            fcntl.flock(f, fcntl.LOCK_UN)
+    finally:
+        shm.close()
 
-        logger.info(
-            f"WorkspaceBuffer initialized: {size_gb}GB on cuda:{device}, "
-            f"ptr={self.workspace.data_ptr():#x}"
-        )
 
-    def allocate(self, size_bytes: int) -> Optional[Tuple[int, str]]:
-        """Allocate a chunk from the workspace.
+# ---------------------------------------------------------------------------
+# MmItemMemoryChunk
+# ---------------------------------------------------------------------------
 
-        Args:
-            size_bytes: Required size in bytes
 
-        Returns:
-            Tuple of (offset, sync_shm_name) if successful, None if no space
-        """
-        # Find a free chunk that's large enough
-        for i, chunk in enumerate(self.chunks):
-            if not chunk.in_use and chunk.size >= size_bytes:
-                # Mark chunk as in use
-                chunk.in_use = True
-
-                # Get or create sync buffer
-                if self.sync_buffer_pool:
-                    sync_shm_name = self.sync_buffer_pool.pop()
-                    # Reset sync flag to 0 (not ready)
-                    self._reset_sync_buffer(sync_shm_name)
-                else:
-                    sync_shm_name = self._create_sync_buffer()
-
-                chunk.sync_shm_name = sync_shm_name
-
-                # If chunk is larger than needed, split it
-                if chunk.size > size_bytes:
-                    # Create a new free chunk for the remaining space
-                    new_chunk = MemoryChunk(
-                        offset=chunk.offset + size_bytes,
-                        size=chunk.size - size_bytes,
-                        in_use=False,
-                    )
-                    chunk.size = size_bytes
-                    self.chunks.insert(i + 1, new_chunk)
+class MmItemMemoryChunk:
+    """A contiguous slice of the ``MmItemMemoryPool`` workspace tensor."""
 
-                logger.debug(
-                    f"Allocated chunk: offset={chunk.offset}, size={size_bytes}, "
-                    f"sync_shm={sync_shm_name}"
-                )
-                return chunk.offset, sync_shm_name
+    def __init__(self, area: Tuple[int, int], sync_flag: ShmSyncBuffer) -> None:
+        self.area = area
+        self.sync_flag = sync_flag
 
-        logger.warning(f"WorkspaceBuffer: No space for {size_bytes} bytes")
-        return None
+    @property
+    def mem_size(self) -> int:
+        return self.area[1] - self.area[0]
 
-    def release(self, offset: int) -> None:
-        """Release a chunk back to the pool.
+    @property
+    def start(self) -> int:
+        return self.area[0]
 
-        Args:
-            offset: Offset of the chunk to release
-        """
-        for i, chunk in enumerate(self.chunks):
-            if chunk.offset == offset and chunk.in_use:
-                chunk.in_use = False
+    @property
+    def end(self) -> int:
+        return self.area[1]
 
-                # Return sync buffer to pool
-                if chunk.sync_shm_name:
-                    self.sync_buffer_pool.append(chunk.sync_shm_name)
-                    chunk.sync_shm_name = None
+    def try_to_recycle(self, num_consumers: int = 1) -> bool:
+        """Return True if all consumers have finished and the chunk can be reused."""
+        val = float(self.sync_flag._arr.item())
+        logger.debug(
+            "[try_to_recycle] area=%s flag=%.0f consumers=%d",
+            self.area,
+            val,
+            num_consumers,
+        )
+        if val >= float(num_consumers):
+            self.sync_flag._arr *= 0.0  # reset for next use
+            return True
+        return False
 
-                # Try to merge with adjacent free chunks
-                self._merge_chunks()
 
-                logger.debug(f"Released chunk: offset={offset}")
-                return
+# ---------------------------------------------------------------------------
+# MmItemMemoryPool – pre-allocated GPU workspace to avoid IPC memory leaks
+# ---------------------------------------------------------------------------
 
-        logger.warning(f"Attempted to release unknown chunk at offset {offset}")
 
-    def _merge_chunks(self) -> None:
-        """Merge adjacent free chunks to reduce fragmentation."""
-        i = 0
-        while i < len(self.chunks) - 1:
-            current = self.chunks[i]
-            next_chunk = self.chunks[i + 1]
+class MmItemMemoryPool:
+    """Pre-allocated GPU memory pool for CUDA IPC tensor transport.
 
-            if not current.in_use and not next_chunk.in_use:
-                # Merge chunks
-                current.size += next_chunk.size
+    Chunks are allocated from a contiguous ``torch.int8`` tensor on GPU.
+    A background thread periodically recycles chunks whose sync flags show
+    that all consumers have finished reading.
 
-                # Keep first chunk's sync buffer, return second to pool
-                if next_chunk.sync_shm_name:
-                    self.sync_buffer_pool.append(next_chunk.sync_shm_name)
+    Args:
+        memory_size: Pool size in **bytes**.
+        recycle_interval: How often (seconds) the recycler thread runs.
+        num_consumers: Number of consumer processes (tp_size). Each consumer
+            must increment the sync flag once before a chunk is recycled.
+        device: CUDA device index.
+    """
 
-                self.chunks.pop(i + 1)
-            else:
-                i += 1
-
-    def _create_sync_buffer(self) -> str:
-        """Create a new shared memory sync buffer (8 bytes, initialized to 0)."""
-        shm_name = f"pymllm_sync_{uuid.uuid4().hex[:12]}"
-        shm = SharedMemory(name=shm_name, create=True, size=8)
-        # Initialize to 0 (not ready)
-        shm.buf[:8] = struct.pack("Q", 0)
-        shm.close()
-        logger.debug(f"Created sync buffer: {shm_name}")
-        return shm_name
+    def __init__(
+        self,
+        memory_size: int,
+        recycle_interval: float = 0.1,
+        num_consumers: int = 1,
+        device: int = 0,
+    ) -> None:
+        self.num_consumers = num_consumers
+        self._recycle_interval = recycle_interval
+        self._lock = threading.Lock()
+        self._stop = False
 
-    def _reset_sync_buffer(self, shm_name: str) -> None:
-        """Reset sync buffer to 0 (not ready)."""
-        try:
-            shm = SharedMemory(name=shm_name, create=False)
-            shm.buf[:8] = struct.pack("Q", 0)
-            shm.close()
-        except Exception as e:
-            logger.warning(f"Failed to reset sync buffer {shm_name}: {e}")
-
-    def copy_tensor_to_workspace(self, tensor: torch.Tensor, offset: int) -> None:
-        """Copy tensor data to workspace at given offset.
-
-        Args:
-            tensor: Source tensor (must be on same CUDA device)
-            offset: Byte offset in workspace
-        """
-        if not tensor.is_cuda or tensor.device.index != self.device:
-            raise ValueError(f"Tensor must be on cuda:{self.device}")
+        with torch.cuda.device(device):
+            self.memory_pool: torch.Tensor = torch.empty(
+                memory_size, dtype=torch.int8, device=f"cuda:{device}"
+            ).contiguous()
+
+        init_chunk = MmItemMemoryChunk((0, memory_size), self._new_sync_buffer())
+        self.available_chunks: List[MmItemMemoryChunk] = [init_chunk]
+        self.occupied_chunks: List[MmItemMemoryChunk] = []
+        # Pool of reusable ShmSyncBuffer objects (returned from recycled chunks)
+        self._sync_pool: List[ShmSyncBuffer] = []
+
+        self._recycler = threading.Thread(
+            target=self._recycle_loop,
+            name="MmItemMemoryPoolRecycler",
+            daemon=True,
+        )
+        self._recycler.start()
 
-        size_bytes = tensor.numel() * tensor.element_size()
+        logger.info(
+            "MmItemMemoryPool: %d MB on cuda:%d, recycle_interval=%.2fs",
+            memory_size // (1024 * 1024),
+            device,
+            recycle_interval,
+        )
 
-        # Get view of workspace at offset
-        offset_elements = offset // 4  # Workspace is float32
-        num_elements = (size_bytes + 3) // 4  # Round up
+    # ------------------------------------------------------------------
+    # Sync buffer management
+    # ------------------------------------------------------------------
+
+    def _new_sync_buffer(self) -> ShmSyncBuffer:
+        if self._sync_pool:
+            return self._sync_pool.pop()
+        return ShmSyncBuffer()
+
+    def _return_sync_buffer(self, buf: ShmSyncBuffer) -> None:
+        buf._arr *= 0.0  # reset counter
+        self._sync_pool.append(buf)
+
+    # ------------------------------------------------------------------
+    # Allocation
+    # ------------------------------------------------------------------
+
+    def _get_available_chunk(self, src: torch.Tensor) -> Optional[MmItemMemoryChunk]:
+        """Best-fit allocation: find the smallest available chunk >= src size."""
+        needed = src.numel() * src.element_size()
+        best: Optional[MmItemMemoryChunk] = None
+        for chunk in self.available_chunks:
+            if chunk.mem_size >= needed:
+                if best is None or chunk.mem_size < best.mem_size:
+                    best = chunk
+        if best is None:
+            return None
 
-        workspace_view = self.workspace[
-            offset_elements : offset_elements + num_elements
-        ]
+        # Split the selected chunk
+        occupied_area = (best.start, best.start + needed)
+        occupied = MmItemMemoryChunk(occupied_area, best.sync_flag)
+        self.occupied_chunks.append(occupied)
+        self.available_chunks.remove(best)
 
-        # Copy tensor data (flatten and cast to float32 view)
-        tensor_flat = tensor.flatten().view(torch.uint8)
-        workspace_flat = workspace_view.view(torch.uint8)[: tensor_flat.numel()]
-        workspace_flat.copy_(tensor_flat)
+        remainder = (occupied.end, best.end)
+        if remainder[0] < remainder[1]:
+            split = MmItemMemoryChunk(remainder, self._new_sync_buffer())
+            self.available_chunks.append(split)
 
-        logger.debug(f"Copied tensor {tensor.shape} to workspace offset {offset}")
+        return occupied
 
-    def get_ipc_handle(self) -> bytes:
-        """Get CUDA IPC handle for the workspace buffer.
+    def get_slice_with_flag(
+        self, src: torch.Tensor
+    ) -> Tuple[Optional[Dict[str, Any]], Optional[torch.Tensor]]:
+        """Allocate a pool slice for *src* and return ``(sync_flag_meta, slice_tensor)``.
 
-        Returns:
-            CUDA IPC handle as bytes
+        Thread-safe.  Returns ``(None, None)`` if the pool is full.
         """
-        # Get IPC handle using torch.cuda API
-        # Note: This requires CUDA-capable device with IPC support
-        handle = cuda.cudart().cudaIpcGetMemHandle(self.workspace.data_ptr())
-        return bytes(handle)
-
-    def cleanup(self) -> None:
-        """Cleanup all sync buffers."""
-        all_shm_names = set()
-        for chunk in self.chunks:
-            if chunk.sync_shm_name:
-                all_shm_names.add(chunk.sync_shm_name)
-        all_shm_names.update(self.sync_buffer_pool)
-
-        for shm_name in all_shm_names:
+        with self._lock:
+            chunk = self._get_available_chunk(src)
+            if chunk is None:
+                logger.warning(
+                    "MmItemMemoryPool full (%d occupied, %d available); "
+                    "falling back to CPU transport",
+                    len(self.occupied_chunks),
+                    len(self.available_chunks),
+                )
+                return None, None
+            pool_slice = self.memory_pool[chunk.start : chunk.end]
+            return chunk.sync_flag.meta_data, pool_slice
+
+    # ------------------------------------------------------------------
+    # Recycling
+    # ------------------------------------------------------------------
+
+    def _recycle_loop(self) -> None:
+        while not self._stop:
             try:
-                shm = SharedMemory(name=shm_name, create=False)
-                shm.close()
-                shm.unlink()
-            except FileNotFoundError:
-                pass
-            except Exception as e:
-                logger.warning(f"Failed to cleanup sync buffer {shm_name}: {e}")
+                with self._lock:
+                    self._recycle_chunks()
+                    self._merge_chunks()
+            except Exception as exc:
+                logger.warning(
+                    "MmItemMemoryPool recycler error: %s", exc, exc_info=True
+                )
+            time.sleep(self._recycle_interval)
+
+    def _recycle_chunks(self) -> None:
+        new_occupied: List[MmItemMemoryChunk] = []
+        for chunk in self.occupied_chunks:
+            if chunk.try_to_recycle(self.num_consumers):
+                self._return_sync_buffer(chunk.sync_flag)
+                chunk.sync_flag = self._new_sync_buffer()
+                self.available_chunks.append(chunk)
+            else:
+                new_occupied.append(chunk)
+        self.occupied_chunks = new_occupied
+
+    def _merge_chunks(self) -> None:
+        """Coalesce adjacent free chunks to reduce fragmentation."""
+        merged: List[MmItemMemoryChunk] = []
+        for chunk in sorted(self.available_chunks, key=lambda c: c.start):
+            if merged and merged[-1].end == chunk.start:
+                prev = merged.pop()
+                self._return_sync_buffer(chunk.sync_flag)
+                merged.append(
+                    MmItemMemoryChunk((prev.start, chunk.end), prev.sync_flag)
+                )
+            else:
+                merged.append(chunk)
+        self.available_chunks = merged
+
+    def shutdown(self) -> None:
+        self._stop = True
+        if self._recycler.is_alive():
+            self._recycler.join(timeout=2.0)
+
 
-        logger.info("WorkspaceBuffer cleaned up")
+# ---------------------------------------------------------------------------
+# CudaIpcTensorTransportProxy – pool-based CUDA IPC proxy object
+# ---------------------------------------------------------------------------
 
 
-@dataclass
-class TensorMetadata:
-    """Metadata for reconstructing a tensor from CUDA IPC handle."""
+class CudaIpcTensorTransportProxy:
+    """Proxy that carries a CUDA IPC handle for a pool-slice tensor.
 
-    shape: Tuple[int, ...]
-    dtype: torch.dtype
-    offset: int  # Byte offset in workspace
-    size_bytes: int
-    sync_shm_name: str  # Shared memory name for sync flag
+    The *sender* process:
+    1. Copies the source tensor into a ``MmItemMemoryPool`` slice (int8 view).
+    2. Wraps the slice in this proxy, which captures the CUDA IPC handle via
+       ``storage._share_cuda_()``.
+    3. Sends the proxy through ``multiprocessing.Queue`` (pickle).
 
+    The *receiver* process:
+    1. Calls :meth:`reconstruct_on_device` to map the IPC memory and copy it
+       into a fresh local tensor.
+    2. The copy increments the sync flag, allowing the sender's recycler to
+       reclaim the pool slice.
 
-class CudaIPCTransport:
-    """Transport for sharing CUDA tensors via IPC handles."""
+    Fallback: if ``_share_cuda_()`` fails (e.g. TP ranks), ``tensor_data`` holds
+    the raw tensor (which will be pickled the normal way, incurring serialization cost).
+    """
 
     def __init__(
         self,
-        workspace_size_gb: float = 4.0,
-        device: int = 0,
-    ):
-        """Initialize CUDA IPC transport.
+        data: torch.Tensor,
+        info_data: torch.Tensor,
+        sync_buffer_meta: Dict[str, Any],
+    ) -> None:
+        if not isinstance(data, torch.Tensor) or not isinstance(
+            info_data, torch.Tensor
+        ):
+            raise TypeError(
+                f"data and info_data must be torch.Tensors, got {type(data)}, {type(info_data)}"
+            )
 
-        Args:
-            workspace_size_gb: Size of workspace buffer in GB
-            device: CUDA device ID
-        """
-        self.device = device
-        self.workspace = WorkspaceBuffer(workspace_size_gb, device)
-        self.ipc_handle = self.workspace.get_ipc_handle()
-        self.queue: Queue = Queue()
+        self.sync_data_meta = sync_buffer_meta
+        self._state = self._build_state(data, info_data)
+        self._reconstructed: Optional[torch.Tensor] = None
+        self._shm: Optional[shared_memory.SharedMemory] = None
 
-    def send_tensor(self, rid: str, tensor: torch.Tensor) -> bool:
-        """Send a tensor via CUDA IPC.
+    def _build_state(
+        self, data: torch.Tensor, info_data: torch.Tensor
+    ) -> Dict[str, Any]:
+        try:
+            storage = data.untyped_storage()
+            handle = storage._share_cuda_()
+            return {
+                "ipc_handle": {
+                    "handle": handle,
+                    "shape": data.shape,
+                    "dtype": data.dtype,
+                    "stride": data.stride(),
+                    "device_index": data.device.index,
+                    "storage_offset": data.storage_offset(),
+                    "target_shape": info_data.shape,
+                    "target_dtype": info_data.dtype,
+                },
+                "tensor_data": None,
+            }
+        except Exception as exc:
+            logger.warning(
+                "CudaIpcTensorTransportProxy: _share_cuda_() failed (%s); "
+                "falling back to direct tensor.",
+                exc,
+            )
+            return {"ipc_handle": None, "tensor_data": data}
 
-        Args:
-            rid: Request ID
-            tensor: Tensor to send (must be on CUDA)
+    def reconstruct_on_device(self, device_index: Optional[int] = None) -> torch.Tensor:
+        """Map IPC memory and copy into a new local tensor.
 
-        Returns:
-            True if sent via CUDA IPC, False if fallback needed
+        This **must** be called from the *receiver* process.  After the copy
+        the sync flag is incremented so the sender can recycle the pool chunk.
         """
-        if not tensor.is_cuda:
-            logger.debug(f"Tensor for {rid} not on CUDA, skipping IPC")
-            return False
-
-        size_bytes = tensor.numel() * tensor.element_size()
-
-        # Try to allocate from workspace
-        result = self.workspace.allocate(size_bytes)
-        if result is None:
-            logger.warning(
-                f"WorkspaceBuffer full, falling back to shared queue for {rid}"
+        if self._reconstructed is not None:
+            return self._reconstructed
+
+        state = self._state
+        if state["ipc_handle"] is not None:
+            h = state["ipc_handle"]
+            source_device = torch.device(f"cuda:{h['device_index']}")
+            target_device = (
+                source_device
+                if device_index is None
+                else torch.device(f"cuda:{device_index}")
             )
-            return False
+            with torch.cuda.device(source_device):
+                storage = torch.UntypedStorage._new_shared_cuda(*h["handle"])
+                slice_tensor = torch.empty(
+                    0, dtype=h["dtype"], device=source_device
+                ).set_(
+                    storage,
+                    storage_offset=h["storage_offset"],
+                    size=h["shape"],
+                    stride=h["stride"],
+                )
 
-        offset, sync_shm_name = result
+                result = torch.empty(
+                    h["target_shape"], dtype=h["target_dtype"], device=target_device
+                ).contiguous()
+                result.view(torch.int8).view(-1).copy_(slice_tensor)
 
-        # Copy tensor to workspace
-        self.workspace.copy_tensor_to_workspace(tensor, offset)
+            # Signal sender that the chunk can be recycled
+            _increment_sync_flag(self.sync_data_meta)
+        elif state["tensor_data"] is not None:
+            result = state["tensor_data"]
+            if device_index is not None:
+                result = result.to(f"cuda:{device_index}", non_blocking=True)
+        else:
+            raise RuntimeError("CudaIpcTensorTransportProxy: invalid state")
 
-        # Create metadata
-        metadata = TensorMetadata(
-            shape=tuple(tensor.shape),
-            dtype=tensor.dtype,
-            offset=offset,
-            size_bytes=size_bytes,
-            sync_shm_name=sync_shm_name,
-        )
+        self._reconstructed = result
+        return result
 
-        # Send metadata through queue
-        self.queue.put((rid, metadata, self.ipc_handle))
 
-        logger.debug(f"Sent tensor {tensor.shape} for {rid} via CUDA IPC")
-        return True
+# ---------------------------------------------------------------------------
+# TransportProxyTensor – simple CUDA IPC via torch.Tensor subclass + pickle
+# ---------------------------------------------------------------------------
 
-    def receive_tensor(
-        self, timeout: float = 0.0001
-    ) -> Optional[Tuple[str, torch.Tensor]]:
-        """Receive a tensor via CUDA IPC.
 
-        Args:
-            timeout: Timeout for queue.get
+class TransportProxyTensor(torch.Tensor):
+    """A ``torch.Tensor`` subclass whose pickle uses CUDA IPC handles.
 
-        Returns:
-            Tuple of (rid, tensor) or None if queue empty
-        """
-        try:
-            rid, metadata, ipc_handle = self.queue.get(timeout=timeout)
-        except Exception:
-            return None
+    When ``transport_mode == "cuda_ipc"`` and the tensor is on CUDA,
+    ``__getstate__`` exports the tensor via ``storage._share_cuda_()`` instead
+    of serialising the raw data.  ``__setstate__`` reconstructs it in the
+    receiving process via ``UntypedStorage._new_shared_cuda``.
 
-        # Open IPC memory handle
-        # Note: This creates a tensor view into the remote process's workspace
-        with torch.cuda.device(self.device):
-            # Reconstruct tensor from IPC handle
-            # This is a view into remote memory, we need to copy it locally
+    Caveat: The underlying GPU allocation is never freed until the *sender*
+    process exits (PyTorch limitation).  Prefer ``"cuda_ipc_pool"`` mode for
+    long-running services to avoid GPU memory leaks.
 
-            # For now, use a simpler approach: signal to copy later
-            # In production, you'd use cuda.cudart().cudaIpcOpenMemHandle
+    When the tensor is on CPU or ``transport_mode == "default"``, the tensor
+    is serialised normally (pickle of raw data).
+    """
 
+    @staticmethod
+    def __new__(
+        cls,
+        data: torch.Tensor,
+        transport_mode: TensorTransportMode = "default",
+    ) -> "TransportProxyTensor":
+        if not isinstance(data, torch.Tensor):
+            raise TypeError(f"data must be a torch.Tensor, got {type(data)}")
+        instance = data.as_subclass(cls)
+        instance._transport_mode = transport_mode
+        return instance
+
+    def __getstate__(self) -> Dict[str, Any]:
+        state: Dict[str, Any] = {
+            "transport_mode": self._transport_mode,
+            "tensor_data": None,
+            "ipc_extra": None,
+        }
+        if self._transport_mode == "cuda_ipc" and self.is_cuda:
+            try:
+                storage = self.untyped_storage()
+                handle = storage._share_cuda_()
+                state["ipc_extra"] = {
+                    "handle": handle,
+                    "shape": self.shape,
+                    "dtype": self.dtype,
+                    "stride": self.stride(),
+                    "device_index": self.device.index,
+                    "storage_offset": self.storage_offset(),
+                }
+            except Exception as exc:
+                logger.warning(
+                    "TransportProxyTensor: _share_cuda_() failed (%s); falling back.",
+                    exc,
+                )
+                state["transport_mode"] = "default"
+                state["tensor_data"] = self.as_subclass(torch.Tensor)
+        else:
+            state["transport_mode"] = "default"
+            state["tensor_data"] = self.as_subclass(torch.Tensor)
+        return state
+
+    def __setstate__(self, state: Dict[str, Any]) -> None:
+        self._transport_mode = state["transport_mode"]
+        if state["transport_mode"] == "cuda_ipc" and state["ipc_extra"] is not None:
+            h = state["ipc_extra"]
+            target = torch.device(f"cuda:{h['device_index']}")
+            try:
+                with torch.cuda.device(target):
+                    storage = torch.UntypedStorage._new_shared_cuda(*h["handle"])
+                    reconstructed = torch.empty(
+                        0, dtype=h["dtype"], device=target
+                    ).set_(
+                        storage,
+                        storage_offset=h["storage_offset"],
+                        size=h["shape"],
+                        stride=h["stride"],
+                    )
+                    self.set_(reconstructed)
+            except Exception as exc:
+                logger.error("TransportProxyTensor: failed to open IPC handle: %s", exc)
+                raise
+        elif state["tensor_data"] is not None:
+            self.set_(state["tensor_data"])
+        else:
+            raise RuntimeError("TransportProxyTensor: invalid state – no tensor data")
+
+    @property
+    def transport_mode(self) -> TensorTransportMode:
+        return getattr(self, "_transport_mode", "default")
+
+
+# ---------------------------------------------------------------------------
+# Helpers: wrap / unwrap mm_inputs dicts
+# ---------------------------------------------------------------------------
+
+
+def wrap_mm_inputs_for_ipc(
+    mm_inputs: Optional[Dict[str, Any]],
+    transport_mode: TensorTransportMode,
+    pool: Optional["MmItemMemoryPool"] = None,
+) -> Optional[Dict[str, Any]]:
+    """Recursively wrap CUDA tensors in *mm_inputs* for IPC transport.
+
+    Args:
+        mm_inputs: Nested dict/list of tensors and other data.
+        transport_mode: One of ``"default"``, ``"cuda_ipc"``, ``"cuda_ipc_pool"``.
+        pool: Required when ``transport_mode == "cuda_ipc_pool"``.
+
+    Returns:
+        A new data structure with CUDA tensors replaced by IPC proxies.
+        CPU tensors are left unchanged (they will be shared via ``share_memory_()``
+        or normal pickling downstream).
+    """
+    if mm_inputs is None:
+        return None
+    return _wrap_recursive(mm_inputs, transport_mode, pool)
+
+
+def _wrap_recursive(
+    data: Any,
+    transport_mode: TensorTransportMode,
+    pool: Optional["MmItemMemoryPool"],
+) -> Any:
+    if isinstance(data, torch.Tensor) and data.is_cuda:
+        return _wrap_cuda_tensor(data, transport_mode, pool)
+    elif isinstance(data, dict):
+        return {k: _wrap_recursive(v, transport_mode, pool) for k, v in data.items()}
+    elif isinstance(data, (list, tuple)):
+        wrapped = [_wrap_recursive(item, transport_mode, pool) for item in data]
+        return type(data)(wrapped)
+    else:
+        return data
+
+
+def _wrap_cuda_tensor(
+    tensor: torch.Tensor,
+    transport_mode: TensorTransportMode,
+    pool: Optional["MmItemMemoryPool"],
+) -> Any:
+    if transport_mode == "cuda_ipc":
+        return TransportProxyTensor(tensor, transport_mode="cuda_ipc")
+
+    if transport_mode == "cuda_ipc_pool":
+        if pool is None:
+            raise ValueError("pool must be provided for transport_mode='cuda_ipc_pool'")
+        sync_meta, pool_slice = pool.get_slice_with_flag(tensor)
+        if pool_slice is not None:
+            # Copy tensor bytes into the pool slice
+            pool_slice.copy_(tensor.view(torch.int8).view(-1), non_blocking=True)
+            return CudaIpcTensorTransportProxy(
+                data=pool_slice,
+                info_data=tensor,
+                sync_buffer_meta=sync_meta,
+            )
+        else:
+            # Pool full – fall back to simple IPC (with potential memory leak)
             logger.warning(
-                "CUDA IPC receive not fully implemented - requires cudaIpcOpenMemHandle"
+                "Pool full; falling back to simple CUDA IPC (potential memory leak)"
             )
-            # TODO: Implement actual IPC handle opening
+            return TransportProxyTensor(tensor, transport_mode="cuda_ipc")
 
-            # Create local tensor and signal copy completion
-            tensor = torch.empty(
-                metadata.shape, dtype=metadata.dtype, device=f"cuda:{self.device}"
-            )
+    # "default" – move to CPU shared memory (handled by share_memory_() downstream)
+    return tensor
 
-            # Mark chunk as ready for reuse by setting sync flag
-            self._mark_chunk_reusable(metadata.sync_shm_name)
 
-            return rid, tensor
+def unwrap_mm_inputs_from_ipc(
+    mm_inputs: Optional[Dict[str, Any]],
+    device_index: Optional[int] = None,
+) -> Optional[Dict[str, Any]]:
+    """Recursively reconstruct tensors from IPC proxy objects.
 
-    def _mark_chunk_reusable(self, sync_shm_name: str) -> None:
-        """Mark a chunk as reusable by setting sync flag to 1."""
-        try:
-            shm = SharedMemory(name=sync_shm_name, create=False)
-            shm.buf[:8] = struct.pack("Q", 1)  # Set to 1 (ready for reuse)
-            shm.close()
-            logger.debug(f"Marked chunk reusable: {sync_shm_name}")
-        except Exception as e:
-            logger.error(f"Failed to mark chunk reusable {sync_shm_name}: {e}")
-
-    def cleanup(self) -> None:
-        """Cleanup resources."""
-        self.workspace.cleanup()
-        self.queue.close()
+    Call this in the *receiver* process after getting data from the queue.
+
+    Args:
+        mm_inputs: Data structure possibly containing IPC proxy objects.
+        device_index: If not None, move reconstructed tensors to this device.
+    """
+    if mm_inputs is None:
+        return None
+    return _unwrap_recursive(mm_inputs, device_index)
+
+
+def _unwrap_recursive(data: Any, device_index: Optional[int]) -> Any:
+    if isinstance(data, CudaIpcTensorTransportProxy):
+        return data.reconstruct_on_device(device_index)
+    elif isinstance(data, TransportProxyTensor):
+        # Already reconstructed during unpickling; just return as plain tensor
+        return data.as_subclass(torch.Tensor)
+    elif isinstance(data, dict):
+        return {k: _unwrap_recursive(v, device_index) for k, v in data.items()}
+    elif isinstance(data, (list, tuple)):
+        result = [_unwrap_recursive(item, device_index) for item in data]
+        return type(data)(result)
+    else:
+        return data
diff --git a/pymllm/orchestrator/scheduler_process.py b/pymllm/orchestrator/scheduler_process.py
index 64ea55b0..8f2d9a95 100644
--- a/pymllm/orchestrator/scheduler_process.py
+++ b/pymllm/orchestrator/scheduler_process.py
@@ -9,6 +9,10 @@
     1. Legacy ZMQ path: Receive TokenizedGenerateReqInput via ZMQ recv_pyobj
     2. Shared queue fast path: Read rid from shared queue and metadata from shared memory
 
+When the shared queue fast path is active the scheduler also handles CUDA IPC
+tensor reconstruction via
+:func:`~pymllm.orchestrator.cuda_ipc_transport.unwrap_mm_inputs_from_ipc`.
+
 The main ``event_loop`` scheduler flow::
 
     while True:
@@ -31,6 +35,10 @@
 import zmq
 
 from pymllm.engine.io_struct import TokenizedGenerateReqInput
+from pymllm.orchestrator.cuda_ipc_transport import (
+    TensorTransportMode,
+    unwrap_mm_inputs_from_ipc,
+)
 from pymllm.orchestrator.ipc_utils import create_zmq_socket
 from pymllm.orchestrator.shared_memory_queue import SharedMemoryManager, TensorQueue
 
@@ -48,6 +56,7 @@ def __init__(
         send_to_detokenizer_addr: str,
         shared_queue: Optional[TensorQueue] = None,
         enable_shared_queue: bool = False,
+        tensor_transport_mode: TensorTransportMode = "default",
     ):
         # ZMQ addresses
         self._recv_from_tokenizer_addr = recv_from_tokenizer_addr
@@ -58,6 +67,7 @@ def __init__(
         # Shared queue configuration
         self._shared_queue = shared_queue
         self._enable_shared_queue = enable_shared_queue
+        self._tensor_transport_mode = tensor_transport_mode
 
         # ZMQ runtime objects (initialised in init_sockets)
         self._zmq_ctx: Optional[zmq.Context] = None
@@ -111,8 +121,9 @@ def init_sockets(self) -> None:
     def event_loop(self) -> None:
         """Infinite scheduling loop."""
         logger.info(
-            "SchedulerProcess event loop started (shared_queue=%s)",
+            "SchedulerProcess event loop started (shared_queue=%s, transport=%s)",
             self._enable_shared_queue,
+            self._tensor_transport_mode,
         )
         while True:
             self.recv_requests()
@@ -163,10 +174,21 @@ def _recv_from_zmq(self) -> None:
                 self._waiting_queue.append(msg)
 
     def _recv_from_shared_queue(self) -> None:
-        """Receive requests via shared memory + shared queue fast path."""
+        """Receive requests via shared memory + shared queue fast path.
+
+        After reading a ``(rid, shm_name, mm_inputs)`` tuple from the queue:
+        1. The tokenized metadata is read from the POSIX shared memory segment.
+        2. If CUDA IPC is enabled, ``mm_inputs`` may contain
+           :class:`~pymllm.orchestrator.cuda_ipc_transport.CudaIpcTensorTransportProxy`
+           or :class:`~pymllm.orchestrator.cuda_ipc_transport.TransportProxyTensor`
+           objects that are reconstructed by calling
+           :func:`~pymllm.orchestrator.cuda_ipc_transport.unwrap_mm_inputs_from_ipc`.
+           This step also increments sync flags so the sender can recycle pool chunks.
+        3. A full ``TokenizedGenerateReqInput`` is assembled and appended to
+           ``_waiting_queue``.
+        """
         while True:
             try:
-                # Non-blocking get from shared queue
                 rid, shm_name, mm_inputs = self._shared_queue.get(timeout=0.0001)
 
                 # Read metadata from shared memory (and unlink immediately)
@@ -174,12 +196,16 @@ def _recv_from_shared_queue(self) -> None:
                     shm_name, unlink=True
                 )
 
-                # Reconstruct the full TokenizedGenerateReqInput with mm_inputs
+                # Reconstruct GPU tensors from CUDA IPC handles (if any)
+                if self._tensor_transport_mode in ("cuda_ipc", "cuda_ipc_pool"):
+                    mm_inputs = unwrap_mm_inputs_from_ipc(mm_inputs)
+
+                # Reassemble the full request
                 full_request = TokenizedGenerateReqInput(
                     rid=metadata.rid,
                     input_text=metadata.input_text,
                     input_ids=metadata.input_ids,
-                    mm_inputs=mm_inputs,  # Restored from shared queue
+                    mm_inputs=mm_inputs,
                     sampling_params=metadata.sampling_params,
                     stream=metadata.stream,
                     return_logprob=metadata.return_logprob,
@@ -190,18 +216,18 @@ def _recv_from_shared_queue(self) -> None:
                 )
 
                 self._waiting_queue.append(full_request)
-                logger.debug(f"Received request {rid} from shared queue")
+                logger.debug("Received request %s from shared queue", rid)
 
             except stdlib_queue.Empty:
-                # No more requests available
                 break
-            except Exception as e:
-                logger.error(f"Error receiving from shared queue: {e}", exc_info=True)
-                # Try to cleanup shared memory if possible
+            except Exception as exc:
+                logger.error(
+                    "Error receiving from shared queue: %s", exc, exc_info=True
+                )
                 try:
                     if "shm_name" in locals():
                         SharedMemoryManager.cleanup(shm_name)
-                except:
+                except Exception:
                     pass
                 break
 
@@ -310,6 +336,7 @@ def run_scheduler_process(
     pipe_writer: Connection,
     shared_queue: Optional[TensorQueue] = None,
     enable_shared_queue: bool = False,
+    tensor_transport_mode: TensorTransportMode = "default",
 ) -> None:
     """Entry point for ``torch.multiprocessing.Process(target=...)``."""
     proc = SchedulerProcess(
@@ -319,6 +346,7 @@ def run_scheduler_process(
         send_to_detokenizer_addr,
         shared_queue=shared_queue,
         enable_shared_queue=enable_shared_queue,
+        tensor_transport_mode=tensor_transport_mode,
     )
     proc.init_sockets()
 
diff --git a/pymllm/orchestrator/shared_memory_queue.py b/pymllm/orchestrator/shared_memory_queue.py
index 3d26ebf1..2f006bdc 100644
--- a/pymllm/orchestrator/shared_memory_queue.py
+++ b/pymllm/orchestrator/shared_memory_queue.py
@@ -1,32 +1,75 @@
 """
 Shared memory and queue utilities for fast IPC between tokenizer and scheduler.
 
-This module implements shared-queue fast path to avoid expensive
-ZMQ serialization of large multimodal tensors.
+This module implements the shared-queue fast path to avoid expensive ZMQ
+serialization of large multimodal tensors.
 
-Design:
-    - Metadata lane: Small tokenized objects stored in shared memory keyed by rid
-    - Tensor lane: Large tensors made shareable via share_memory_() and passed by handle
+## Design
+
+- **Metadata lane**: Small tokenized objects are written to a POSIX shared memory
+  segment keyed by the request ID (``rid``). The scheduler reads and immediately
+  unlinks the segment.
+
+- **Tensor lane**: Large tensors can be transported in one of three modes,
+  controlled by ``TensorTransportMode`` (passed at queue construction time):
+
+  * ``"default"``       – CPU tensors only. GPU tensors are moved to POSIX shared
+    memory via ``tensor.share_memory_()`` (or left on CPU if already there).
+    This is the original behaviour and requires no CUDA support.
+
+  * ``"cuda_ipc"``      – GPU tensors stay on GPU and are wrapped in
+    :class:`~pymllm.orchestrator.cuda_ipc_transport.TransportProxyTensor`. On the
+    receiver side the proxy's ``__setstate__`` automatically reconstructs the
+    tensor from the CUDA IPC handle during unpickling. CPU tensors are handled as
+    in ``"default"`` mode. **Caveat**: GPU memory is not freed until the sender
+    process exits (PyTorch limitation). Prefer ``"cuda_ipc_pool"`` for services.
+
+  * ``"cuda_ipc_pool"`` – GPU tensors are copied into a pre-allocated
+    :class:`~pymllm.orchestrator.cuda_ipc_transport.MmItemMemoryPool` workspace and
+    wrapped in :class:`~pymllm.orchestrator.cuda_ipc_transport.CudaIpcTensorTransportProxy`.
+    After the receiver copies the data it increments a sync flag and the sender's
+    recycler thread returns the chunk to the pool. This avoids GPU memory leaks.
+    CPU tensors are handled as in ``"default"`` mode.
+
+## Key relationship with CUDA IPC
+
+``"default"`` and ``"cuda_ipc*"`` modes are **mutually exclusive for GPU tensors**:
+
+- In ``"default"`` mode, GPU tensors that need to cross process boundaries must
+  first be moved to CPU (``share_memory_()``). This incurs a GPU→CPU copy.
+- In ``"cuda_ipc*"`` modes, GPU tensors are shared as-is via CUDA IPC handles;
+  no copy to CPU is needed.
+
+CPU tensors are always handled via ``share_memory_()`` regardless of the mode.
 """
 
+from __future__ import annotations
+
 import logging
 import pickle
 import uuid
 from multiprocessing import Queue
 from multiprocessing.shared_memory import SharedMemory
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Literal, Optional
 
 import torch
 
+from pymllm.orchestrator.cuda_ipc_transport import (
+    MmItemMemoryPool,
+    TensorTransportMode,
+    unwrap_mm_inputs_from_ipc,
+    wrap_mm_inputs_for_ipc,
+)
+
 logger = logging.getLogger(__name__)
 
 
 class SharedMemoryManager:
     """Manages shared memory segments for passing metadata between processes.
 
-    Each tokenized request's metadata is written to a unique shared memory segment
-    keyed by its request ID (rid). The scheduler reads and immediately unlinks the
-    segment to prevent memory leaks.
+    Each tokenized request's metadata is written to a unique shared memory
+    segment keyed by its request ID (rid). The scheduler reads and immediately
+    unlinks the segment to prevent memory leaks.
     """
 
     @staticmethod
@@ -40,23 +83,17 @@ def write_metadata(rid: str, metadata: Any) -> str:
         Returns:
             str: The shared memory segment name
         """
-        # Serialize the metadata
         data = pickle.dumps(metadata)
         size = len(data)
-
-        # Create unique shared memory segment name
         shm_name = f"pymllm_meta_{rid}_{uuid.uuid4().hex[:8]}"
-
         try:
-            # Create shared memory segment
             shm = SharedMemory(name=shm_name, create=True, size=size)
-            # Write data
             shm.buf[:size] = data
             shm.close()
-            logger.debug(f"Wrote {size} bytes to shared memory {shm_name}")
+            logger.debug("Wrote %d bytes to shared memory %s", size, shm_name)
             return shm_name
-        except Exception as e:
-            logger.error(f"Failed to write metadata to shared memory: {e}")
+        except Exception as exc:
+            logger.error("Failed to write metadata to shared memory: %s", exc)
             raise
 
     @staticmethod
@@ -71,25 +108,21 @@ def read_metadata(shm_name: str, unlink: bool = True) -> Any:
             The deserialized metadata object
         """
         try:
-            # Open existing shared memory segment
             shm = SharedMemory(name=shm_name, create=False)
-            # Read and deserialize data
             data = bytes(shm.buf[:])
             metadata = pickle.loads(data)
             shm.close()
-
-            # Unlink to free memory immediately
             if unlink:
                 try:
                     shm.unlink()
-                    logger.debug(f"Read and unlinked shared memory {shm_name}")
+                    logger.debug("Read and unlinked shared memory %s", shm_name)
                 except FileNotFoundError:
-                    # Already unlinked, ignore
                     pass
-
             return metadata
-        except Exception as e:
-            logger.error(f"Failed to read metadata from shared memory {shm_name}: {e}")
+        except Exception as exc:
+            logger.error(
+                "Failed to read metadata from shared memory %s: %s", shm_name, exc
+            )
             raise
 
     @staticmethod
@@ -99,85 +132,137 @@ def cleanup(shm_name: str) -> None:
             shm = SharedMemory(name=shm_name, create=False)
             shm.close()
             shm.unlink()
-            logger.debug(f"Cleaned up shared memory {shm_name}")
+            logger.debug("Cleaned up shared memory %s", shm_name)
         except FileNotFoundError:
-            pass  # Already cleaned up
-        except Exception as e:
-            logger.warning(f"Failed to cleanup shared memory {shm_name}: {e}")
+            pass
+        except Exception as exc:
+            logger.warning("Failed to cleanup shared memory %s: %s", shm_name, exc)
 
 
 class TensorQueue:
-    """Queue for passing large tensors between processes using shared memory.
+    """Queue for passing large tensors between processes.
 
-    Tensors are made shareable via .share_memory_() and passed through a
-    multiprocessing.Queue by handle (metadata only, not the actual data).
-    """
+    Depending on ``transport_mode``, GPU tensors are either moved to CPU shared
+    memory (``"default"``) or kept on GPU and shared via CUDA IPC handles
+    (``"cuda_ipc"`` / ``"cuda_ipc_pool"``).
 
-    def __init__(self, maxsize: int = 0):
-        """Initialize the tensor queue.
+    Args:
+        maxsize: Maximum queue size (0 for unlimited).
+        transport_mode: Controls how GPU tensors are transported.
+        pool: Required when ``transport_mode == "cuda_ipc_pool"``.
+    """
 
-        Args:
-            maxsize: Maximum queue size (0 for unlimited)
-        """
+    def __init__(
+        self,
+        maxsize: int = 0,
+        transport_mode: TensorTransportMode = "default",
+        pool: Optional[MmItemMemoryPool] = None,
+    ) -> None:
+        # pool is allowed to be None at construction time for "cuda_ipc_pool" mode
+        # because the pool is initialised lazily inside the sender subprocess.
+        # The pool reference is injected later via _pool attribute assignment.
         self._queue: Queue = Queue(maxsize=maxsize)
+        self._transport_mode = transport_mode
+        self._pool = pool
+
+    # ------------------------------------------------------------------
+    # Producer side
+    # ------------------------------------------------------------------
 
-    def put(self, rid: str, shm_name: str, mm_inputs: Optional[Dict[str, Any]]) -> None:
-        """Put a request with multimodal inputs into the queue.
+    def put(
+        self,
+        rid: str,
+        shm_name: str,
+        mm_inputs: Optional[Dict[str, Any]],
+    ) -> None:
+        """Put a request into the queue.
+
+        GPU tensors inside *mm_inputs* are wrapped according to
+        ``transport_mode`` before being placed into the underlying
+        ``multiprocessing.Queue``.
 
         Args:
-            rid: Request ID
-            shm_name: Shared memory segment name for metadata
-            mm_inputs: Multimodal inputs dict (can contain torch tensors)
+            rid: Request ID.
+            shm_name: Shared memory segment name for the tokenized metadata.
+            mm_inputs: Multimodal inputs dict (may contain CUDA tensors).
         """
-        # Make tensors shareable if present
         if mm_inputs is not None:
-            mm_inputs = self._make_tensors_shareable(mm_inputs)
+            if self._transport_mode in ("cuda_ipc", "cuda_ipc_pool"):
+                if self._transport_mode == "cuda_ipc_pool" and self._pool is None:
+                    # Pool not yet initialised (race condition or CUDA unavailable);
+                    # fall back to simple CUDA IPC for this message.
+                    effective_mode = "cuda_ipc"
+                else:
+                    effective_mode = self._transport_mode
+                # Wrap CUDA tensors in IPC proxies (stays on GPU, no copy to CPU)
+                mm_inputs = wrap_mm_inputs_for_ipc(
+                    mm_inputs,
+                    transport_mode=effective_mode,
+                    pool=self._pool,
+                )
+                # CPU tensors within mm_inputs are still shared via share_memory_()
+                mm_inputs = self._share_cpu_tensors(mm_inputs)
+            else:
+                # "default": move all tensors to CPU shared memory
+                mm_inputs = self._make_tensors_shareable(mm_inputs)
 
         self._queue.put((rid, shm_name, mm_inputs))
-        logger.debug(f"Put request {rid} into tensor queue (shm={shm_name})")
+        logger.debug("Put request %s into tensor queue (shm=%s)", rid, shm_name)
+
+    # ------------------------------------------------------------------
+    # Consumer side
+    # ------------------------------------------------------------------
 
     def get(
         self, timeout: Optional[float] = None
     ) -> tuple[str, str, Optional[Dict[str, Any]]]:
         """Get a request from the queue.
 
+        GPU tensors wrapped as IPC proxies are **not** automatically
+        reconstructed here – the caller (scheduler) must call
+        :func:`~pymllm.orchestrator.cuda_ipc_transport.unwrap_mm_inputs_from_ipc`
+        after retrieval.
+
         Args:
-            timeout: Timeout in seconds (None for blocking indefinitely)
+            timeout: Timeout in seconds (None for blocking).
 
         Returns:
-            Tuple of (rid, shm_name, mm_inputs)
+            Tuple of ``(rid, shm_name, mm_inputs)``.
         """
         rid, shm_name, mm_inputs = self._queue.get(timeout=timeout)
-        logger.debug(f"Got request {rid} from tensor queue (shm={shm_name})")
+        logger.debug("Got request %s from tensor queue (shm=%s)", rid, shm_name)
         return rid, shm_name, mm_inputs
 
+    # ------------------------------------------------------------------
+    # Queue introspection
+    # ------------------------------------------------------------------
+
     def empty(self) -> bool:
-        """Check if the queue is empty."""
         return self._queue.empty()
 
     def qsize(self) -> int:
-        """Return the approximate size of the queue."""
         try:
             return self._queue.qsize()
         except NotImplementedError:
-            return 0  # Some platforms don't support qsize
+            return 0
 
     def close(self) -> None:
-        """Close the queue."""
         self._queue.close()
 
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
     @staticmethod
     def _make_tensors_shareable(data: Any) -> Any:
-        """Recursively make all torch tensors in a data structure shareable.
+        """Recursively move all tensors (CPU and CUDA) to POSIX shared memory.
 
-        Args:
-            data: Nested dict/list/tensor structure
-
-        Returns:
-            The same structure with tensors made shareable via share_memory_()
+        GPU tensors are first moved to CPU (incurring a device copy), then
+        placed in shared memory.  This is the ``"default"`` path.
         """
         if isinstance(data, torch.Tensor):
-            # Make tensor shareable across processes
+            if data.is_cuda:
+                data = data.cpu()
             if not data.is_shared():
                 data = data.share_memory_()
             return data
@@ -188,3 +273,20 @@ def _make_tensors_shareable(data: Any) -> Any:
             return type(data)(result)
         else:
             return data
+
+    @staticmethod
+    def _share_cpu_tensors(data: Any) -> Any:
+        """Recursively place CPU tensors in shared memory (GPU tensors are already
+        wrapped as IPC proxies and must not be touched here).
+        """
+        if isinstance(data, torch.Tensor) and not data.is_cuda:
+            if not data.is_shared():
+                data = data.share_memory_()
+            return data
+        elif isinstance(data, dict):
+            return {k: TensorQueue._share_cpu_tensors(v) for k, v in data.items()}
+        elif isinstance(data, (list, tuple)):
+            result = [TensorQueue._share_cpu_tensors(item) for item in data]
+            return type(data)(result)
+        else:
+            return data
diff --git a/pymllm/orchestrator/tokenizer_process.py b/pymllm/orchestrator/tokenizer_process.py
index 43db5ba0..587a7c1e 100644
--- a/pymllm/orchestrator/tokenizer_process.py
+++ b/pymllm/orchestrator/tokenizer_process.py
@@ -4,9 +4,26 @@
 Receives raw requests from RequestResponseProcess via ZMQ, tokenizes them,
 and forwards the tokenized payloads to the SchedulerProcess.
 
-Supports two modes:
-    1. Legacy ZMQ path: Send TokenizedGenerateReqInput via ZMQ send_pyobj
-    2. Shared queue fast path: Write metadata to shared memory and put rid in shared queue
+Supports two transport modes (controlled by ``enable_shared_queue`` and
+``tensor_transport_mode`` in the tokenizer config):
+
+1. **Legacy ZMQ path** (``enable_shared_queue=False``):
+   Tokenized objects are sent directly via ``ZMQ send_pyobj`` (pickle). This
+   is simple but slow for large multimodal tensors.
+
+2. **Shared queue fast path** (``enable_shared_queue=True``):
+   Metadata is written to POSIX shared memory and the queue carries a
+   lightweight ``(rid, shm_name, mm_inputs)`` tuple. The GPU tensors inside
+   ``mm_inputs`` are transported differently depending on ``tensor_transport_mode``:
+
+   * ``"default"``       – GPU tensors are moved to CPU first (GPU→CPU copy),
+     then placed in POSIX shared memory.
+   * ``"cuda_ipc"``      – GPU tensors stay on GPU; they are wrapped in a
+     :class:`TransportProxyTensor` whose pickle uses CUDA IPC handles.
+     Simple but may leak GPU memory.
+   * ``"cuda_ipc_pool"`` – GPU tensors are copied into a pre-allocated
+     :class:`MmItemMemoryPool` workspace and shared via pool-chunk IPC
+     handles. Chunks are recycled; no GPU memory is leaked.
 """
 
 import logging
@@ -17,6 +34,7 @@
 from transformers import AutoProcessor, AutoTokenizer
 
 from pymllm.engine.io_struct import TokenizedGenerateReqInput
+from pymllm.orchestrator.cuda_ipc_transport import MmItemMemoryPool, TensorTransportMode
 from pymllm.orchestrator.ipc_utils import create_zmq_socket
 from pymllm.orchestrator.shared_memory_queue import SharedMemoryManager, TensorQueue
 
@@ -40,16 +58,22 @@ def __init__(
             Serialisable dict built by the parent process (``Engine``) before
             spawning.  Required keys:
 
-            * ``tokenizer_path``    – str, path to the tokenizer directory.
-            * ``tokenizer_mode``    – ``"auto" | "slow" | "fast"``.
-            * ``trust_remote_code`` – bool.
-            * ``context_length``    – Optional[int], explicit cap; inferred from
-              ``hf_config`` when ``None``.
-            * ``hf_config``         – Optional HuggingFace PretrainedConfig
-              (pickled by multiprocessing); used only to infer ``context_length``.
-            * ``enable_shared_queue`` – bool, whether to use shared memory fast path.
+            * ``tokenizer_path``        – str, path to the tokenizer directory.
+            * ``tokenizer_mode``        – ``"auto" | "slow" | "fast"``.
+            * ``trust_remote_code``     – bool.
+            * ``context_length``        – Optional[int], explicit cap; inferred
+              from ``hf_config`` when ``None``.
+            * ``hf_config``             – Optional HuggingFace PretrainedConfig.
+            * ``enable_shared_queue``   – bool, whether to use shared memory fast path.
+            * ``tensor_transport_mode`` – ``"default" | "cuda_ipc" | "cuda_ipc_pool"``.
+            * ``cuda_ipc_pool_size_mb`` – int, pool size in MB (cuda_ipc_pool only).
+            * ``cuda_ipc_recycle_interval`` – float, recycler sleep interval (s).
+
         shared_queue:
-            Optional TensorQueue for shared memory fast path communication.
+            Optional :class:`TensorQueue` for the shared memory fast path.
+            When *transport_mode* is ``"cuda_ipc_pool"`` this queue should have
+            been constructed with a ``MmItemMemoryPool``; the ``TokenizerProcess``
+            initialises its own pool in that case.
         """
         self._recv_from_rr_addr = recv_from_rr_addr
         self._send_to_scheduler_addr = send_to_scheduler_addr
@@ -57,6 +81,21 @@ def __init__(
         self._enable_shared_queue = tokenizer_cfg.get("enable_shared_queue", False)
         self._shared_queue = shared_queue
 
+        # Tensor transport configuration
+        self._transport_mode: TensorTransportMode = tokenizer_cfg.get(
+            "tensor_transport_mode", "default"
+        )
+        # Pool for cuda_ipc_pool mode – will be initialised lazily when the
+        # process first encounters a CUDA tensor.
+        self._ipc_pool: Optional[MmItemMemoryPool] = None
+        if self._transport_mode == "cuda_ipc_pool":
+            # The pool must be created inside the subprocess (after fork/spawn)
+            # because it allocates CUDA memory.  We defer to _ensure_pool().
+            pool_mb: int = int(tokenizer_cfg.get("cuda_ipc_pool_size_mb", 512))
+            recycle: float = float(tokenizer_cfg.get("cuda_ipc_recycle_interval", 0.1))
+            self._ipc_pool_size_mb = pool_mb
+            self._ipc_recycle_interval = recycle
+
         self._zmq_ctx: Optional[zmq.Context] = None
         self._recv_from_rr: Optional[zmq.Socket] = None
         self._send_to_scheduler: Optional[zmq.Socket] = None
@@ -89,8 +128,9 @@ def init_sockets(self) -> None:
     def event_loop(self) -> None:
         """Infinite loop: recv raw request -> tokenize -> send to scheduler."""
         logger.info(
-            "TokenizerProcess event loop started (shared_queue=%s)",
+            "TokenizerProcess event loop started (shared_queue=%s, transport=%s)",
             self._enable_shared_queue,
+            self._transport_mode,
         )
         while True:
             raw_request: Dict[str, Any] = self._recv_from_rr.recv_pyobj()
@@ -108,12 +148,19 @@ def _send_via_shared_queue(
     ) -> None:
         """Send tokenized request via shared memory + shared queue fast path.
 
-        Args:
-            tokenized: Either TokenizedGenerateReqInput dataclass or abort dict
+        GPU tensors inside ``mm_inputs`` are handled according to
+        ``self._transport_mode``:
+
+        * ``"default"``       – moved to CPU via ``share_memory_()`` by ``TensorQueue``.
+        * ``"cuda_ipc"``      – wrapped in :class:`TransportProxyTensor` (stays on GPU).
+        * ``"cuda_ipc_pool"`` – copied into the :class:`MmItemMemoryPool` workspace and
+          wrapped in :class:`CudaIpcTensorTransportProxy`.
+
+        Abort sentinel messages are forwarded via ZMQ (they are lightweight dicts).
         """
         # Handle abort sentinel
         if isinstance(tokenized, dict) and tokenized.get("abort"):
-            # Fallback to ZMQ for abort messages
+            # Fallback to ZMQ for abort messages (no tensor payload)
             self._send_to_scheduler.send_pyobj(tokenized)
             return
 
@@ -121,10 +168,14 @@ def _send_via_shared_queue(
             f"Expected TokenizedGenerateReqInput, got {type(tokenized)}"
         )
 
+        # Lazily initialise the CUDA IPC pool (must happen inside the subprocess)
+        if self._transport_mode == "cuda_ipc_pool":
+            self._ensure_pool()
+
         rid = tokenized.rid
         mm_inputs = tokenized.mm_inputs
 
-        # Create a lightweight metadata object (without mm_inputs)
+        # Create lightweight metadata object (mm_inputs sent separately via queue)
         metadata = TokenizedGenerateReqInput(
             rid=tokenized.rid,
             input_text=tokenized.input_text,
@@ -143,9 +194,73 @@ def _send_via_shared_queue(
         shm_name = SharedMemoryManager.write_metadata(rid, metadata)
 
         # Put (rid, shm_name, mm_inputs) into shared queue
+        # TensorQueue.put() handles wrapping mm_inputs based on transport_mode
         self._shared_queue.put(rid, shm_name, mm_inputs)
 
-        logger.debug(f"Sent request {rid} via shared queue (shm={shm_name})")
+        logger.debug(
+            "Sent request %s via shared queue (shm=%s, transport=%s)",
+            rid,
+            shm_name,
+            self._transport_mode,
+        )
+
+    # ------------------------------------------------------------------
+    # CUDA IPC pool initialisation (deferred to subprocess)
+    # ------------------------------------------------------------------
+
+    def _ensure_pool(self) -> None:
+        """Lazily create the MmItemMemoryPool inside the subprocess.
+
+        This is deferred because CUDA context creation must happen after
+        ``torch.multiprocessing.Process`` has started (post-fork/spawn).
+        Once the pool is created we update the shared queue's transport config
+        in-place so the same underlying ``multiprocessing.Queue`` object is reused
+        (both processes already hold a reference to it).
+        """
+        if self._ipc_pool is not None:
+            return
+        try:
+            import torch
+
+            if not torch.cuda.is_available():
+                logger.warning(
+                    "CUDA not available; falling back to transport_mode='default'"
+                )
+                self._transport_mode = "default"
+                if self._shared_queue is not None:
+                    self._shared_queue._transport_mode = "default"
+                return
+
+            pool_bytes = self._ipc_pool_size_mb * 1024 * 1024
+            device = torch.cuda.current_device()
+            self._ipc_pool = MmItemMemoryPool(
+                memory_size=pool_bytes,
+                recycle_interval=self._ipc_recycle_interval,
+                device=device,
+            )
+            # Update the shared queue's config in-place.
+            # Both processes share the same multiprocessing.Queue object, so we
+            # just update the wrapper's transport metadata; the underlying queue
+            # pipe is unchanged.
+            if self._shared_queue is not None:
+                self._shared_queue._transport_mode = self._transport_mode
+                self._shared_queue._pool = self._ipc_pool
+
+            logger.info(
+                "MmItemMemoryPool initialised: %d MB on cuda:%d",
+                self._ipc_pool_size_mb,
+                device,
+            )
+        except Exception as exc:
+            logger.error(
+                "Failed to initialise MmItemMemoryPool: %s; "
+                "falling back to transport_mode='default'",
+                exc,
+                exc_info=True,
+            )
+            self._transport_mode = "default"
+            if self._shared_queue is not None:
+                self._shared_queue._transport_mode = "default"
 
     # ------------------------------------------------------------------
     # Tokenization and multimodal preprocessing
@@ -352,6 +467,8 @@ def _collect_mm_inputs(
         return mm
 
     def shutdown(self) -> None:
+        if self._ipc_pool is not None:
+            self._ipc_pool.shutdown()
         if self._recv_from_rr is not None:
             self._recv_from_rr.close()
         if self._send_to_scheduler is not None:

From 9bc959fcd90bded9eb4a2fdeb3ad8208b1c6f790 Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Mon, 2 Mar 2026 06:55:36 +0000
Subject: [PATCH 12/13] feat(sampling): add sampling module with FlashInfer
 acceleration and PyTorch fallback

- Introduce pymllm.layers.sampling with diverse sampling functions
- Implement softmax with temperature scaling and FlashInfer support
- Add category sampling from probabilities and logits with optional determinism
- Support top-p (nucleus), top-k, and min-p sampling methods
- Provide combined top-k + top-p sampling from logits and probabilities
- Add probability renormalization for top-p and top-k thresholds
- Implement top-k masking for logits to filter out lower probabilities
- Include chain speculative sampling for accelerated sequence generation
- Provide pure-PyTorch fallback implementations for all methods
- Update pymllm.layers.__init__.py to export new sampling functions
- Rename pymllm.executor.eager_runner.py to model_runner.py for clarity
---
 .../{eager_runner.py => model_runner.py}      |   0
 pymllm/layers/__init__.py                     |  26 +
 pymllm/layers/sampling.py                     | 767 ++++++++++++++++++
 3 files changed, 793 insertions(+)
 rename pymllm/executor/{eager_runner.py => model_runner.py} (100%)

diff --git a/pymllm/executor/eager_runner.py b/pymllm/executor/model_runner.py
similarity index 100%
rename from pymllm/executor/eager_runner.py
rename to pymllm/executor/model_runner.py
diff --git a/pymllm/layers/__init__.py b/pymllm/layers/__init__.py
index fd9a070e..97cfb921 100644
--- a/pymllm/layers/__init__.py
+++ b/pymllm/layers/__init__.py
@@ -13,6 +13,20 @@
     apply_rope_pos_ids,
     apply_rope_with_cos_sin_cache,
 )
+from pymllm.layers.sampling import (
+    chain_speculative_sampling,
+    min_p_sampling_from_probs,
+    sampling_from_logits,
+    sampling_from_probs,
+    softmax,
+    top_k_mask_logits,
+    top_k_renorm_probs,
+    top_k_sampling_from_probs,
+    top_k_top_p_sampling_from_logits,
+    top_k_top_p_sampling_from_probs,
+    top_p_renorm_probs,
+    top_p_sampling_from_probs,
+)
 from pymllm.layers.utils import set_weight_attrs
 
 __all__ = [
@@ -32,4 +46,16 @@
     "apply_rope_pos_ids",
     "apply_llama31_rope_pos_ids",
     "apply_rope_with_cos_sin_cache",
+    "softmax",
+    "sampling_from_probs",
+    "sampling_from_logits",
+    "top_p_sampling_from_probs",
+    "top_k_sampling_from_probs",
+    "min_p_sampling_from_probs",
+    "top_k_top_p_sampling_from_logits",
+    "top_k_top_p_sampling_from_probs",
+    "top_p_renorm_probs",
+    "top_k_renorm_probs",
+    "top_k_mask_logits",
+    "chain_speculative_sampling",
 ]
diff --git a/pymllm/layers/sampling.py b/pymllm/layers/sampling.py
index e69de29b..ff84879c 100644
--- a/pymllm/layers/sampling.py
+++ b/pymllm/layers/sampling.py
@@ -0,0 +1,767 @@
+"""Sampling operations with FlashInfer acceleration and PyTorch fallback.
+
+This module wraps all flashinfer.sampling APIs and provides pure-PyTorch
+fallback implementations so that the rest of the codebase can import from
+here without worrying about whether FlashInfer is installed.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Optional, Tuple, Union
+
+import torch
+
+logger = logging.getLogger(__name__)
+
+try:
+    import flashinfer.sampling as _fi_sampling
+
+    _HAS_FLASHINFER = True
+except ImportError:
+    _HAS_FLASHINFER = False
+    logger.warning("flashinfer not found, falling back to PyTorch sampling kernels")
+
+
+# ---------------------------------------------------------------------------
+# Helper utilities (torch fallback)
+# ---------------------------------------------------------------------------
+
+
+def _resolve_indices(
+    data: torch.Tensor, indices: Optional[torch.Tensor]
+) -> torch.Tensor:
+    """If *indices* is given, gather rows from *data* accordingly."""
+    if indices is None:
+        return data
+    return data[indices.long()]
+
+
+def _to_scalar_or_tensor(
+    value: Union[torch.Tensor, float, int],
+    batch_size: int,
+    device: torch.device,
+) -> torch.Tensor:
+    """Broadcast a scalar or per-batch tensor to shape ``(batch_size,)``."""
+    if isinstance(value, (int, float)):
+        return torch.full((batch_size,), value, device=device, dtype=torch.float32)
+    return value.to(device=device, dtype=torch.float32)
+
+
+# ---------------------------------------------------------------------------
+# softmax
+# ---------------------------------------------------------------------------
+
+
+def softmax(
+    logits: torch.Tensor,
+    temperature: Optional[Union[torch.Tensor, float]] = None,
+    enable_pdl: Optional[bool] = None,
+) -> torch.Tensor:
+    """Safe softmax with optional temperature scaling.
+
+    Parameters
+    ----------
+    logits : torch.Tensor
+        Shape ``(batch_size, num_classes)``.
+    temperature : Optional[Union[torch.Tensor, float]]
+        Scalar or per-request ``(batch_size,)`` temperature.
+    enable_pdl : Optional[bool]
+        FlashInfer PDL flag (ignored in fallback).
+
+    Returns
+    -------
+    torch.Tensor
+        Probabilities with the same shape as *logits*.
+    """
+    if _HAS_FLASHINFER:
+        return _fi_sampling.softmax(
+            logits, temperature=temperature, enable_pdl=enable_pdl
+        )
+
+    if temperature is not None:
+        if isinstance(temperature, (int, float)):
+            logits = logits / temperature
+        else:
+            logits = logits / temperature.unsqueeze(-1)
+    return torch.softmax(logits, dim=-1)
+
+
+# ---------------------------------------------------------------------------
+# sampling_from_probs
+# ---------------------------------------------------------------------------
+
+
+def sampling_from_probs(
+    probs: torch.Tensor,
+    indices: Optional[torch.Tensor] = None,
+    deterministic: bool = True,
+    generator: Optional[torch.Generator] = None,
+    check_nan: bool = False,
+    seed: Optional[int] = None,
+    offset: Optional[int] = None,
+) -> torch.Tensor:
+    """Category sampling from probabilities.
+
+    Parameters
+    ----------
+    probs : torch.Tensor
+        ``(batch_size, num_classes)`` or ``(unique_batch_size, num_classes)``
+        when *indices* is provided.
+    indices : Optional[torch.Tensor]
+        Maps each output to a row in *probs*.
+    deterministic, generator, check_nan, seed, offset
+        See FlashInfer docs.
+
+    Returns
+    -------
+    torch.Tensor
+        Sampled token ids, shape ``(batch_size,)``.
+    """
+    if _HAS_FLASHINFER:
+        return _fi_sampling.sampling_from_probs(
+            probs,
+            indices=indices,
+            deterministic=deterministic,
+            generator=generator,
+            check_nan=check_nan,
+            seed=seed,
+            offset=offset,
+        )
+
+    p = _resolve_indices(probs, indices)
+    samples = torch.multinomial(p.float(), num_samples=1, generator=generator).squeeze(
+        -1
+    )
+    return samples.to(torch.int32)
+
+
+# ---------------------------------------------------------------------------
+# sampling_from_logits
+# ---------------------------------------------------------------------------
+
+
+def sampling_from_logits(
+    logits: torch.Tensor,
+    indices: Optional[torch.Tensor] = None,
+    deterministic: bool = True,
+    generator: Optional[torch.Generator] = None,
+    check_nan: bool = False,
+    seed: Optional[int] = None,
+    offset: Optional[int] = None,
+) -> torch.Tensor:
+    """Category sampling from logits (applies softmax internally).
+
+    Parameters
+    ----------
+    logits : torch.Tensor
+        ``(batch_size, num_classes)``.
+    indices, deterministic, generator, check_nan, seed, offset
+        See FlashInfer docs.
+
+    Returns
+    -------
+    torch.Tensor
+        Sampled token ids, shape ``(batch_size,)``.
+    """
+    if _HAS_FLASHINFER:
+        return _fi_sampling.sampling_from_logits(
+            logits,
+            indices=indices,
+            deterministic=deterministic,
+            generator=generator,
+            check_nan=check_nan,
+            seed=seed,
+            offset=offset,
+        )
+
+    probs = torch.softmax(logits.float(), dim=-1)
+    return sampling_from_probs(
+        probs,
+        indices=indices,
+        deterministic=deterministic,
+        generator=generator,
+        check_nan=check_nan,
+    )
+
+
+# ---------------------------------------------------------------------------
+# top_p_sampling_from_probs
+# ---------------------------------------------------------------------------
+
+
+def top_p_sampling_from_probs(
+    probs: torch.Tensor,
+    top_p: Union[torch.Tensor, float],
+    indices: Optional[torch.Tensor] = None,
+    deterministic: bool = True,
+    generator: Optional[torch.Generator] = None,
+    check_nan: bool = False,
+    seed: Optional[int] = None,
+    offset: Optional[int] = None,
+) -> torch.Tensor:
+    """Top-p (nucleus) sampling from probabilities.
+
+    Parameters
+    ----------
+    probs : torch.Tensor
+        ``(batch_size, num_classes)``.
+    top_p : Union[torch.Tensor, float]
+        Top-p threshold.
+    indices, deterministic, generator, check_nan, seed, offset
+        See FlashInfer docs.
+
+    Returns
+    -------
+    torch.Tensor
+        Sampled token ids, shape ``(batch_size,)``.
+    """
+    if _HAS_FLASHINFER:
+        return _fi_sampling.top_p_sampling_from_probs(
+            probs,
+            top_p,
+            indices=indices,
+            deterministic=deterministic,
+            generator=generator,
+            check_nan=check_nan,
+            seed=seed,
+            offset=offset,
+        )
+
+    p = _resolve_indices(probs, indices).float()
+    renormed = _torch_top_p_renorm_probs(p, top_p)
+    samples = torch.multinomial(renormed, num_samples=1, generator=generator).squeeze(
+        -1
+    )
+    return samples.to(torch.int32)
+
+
+# ---------------------------------------------------------------------------
+# top_k_sampling_from_probs
+# ---------------------------------------------------------------------------
+
+
+def top_k_sampling_from_probs(
+    probs: torch.Tensor,
+    top_k: Union[torch.Tensor, int],
+    indices: Optional[torch.Tensor] = None,
+    deterministic: bool = True,
+    generator: Optional[torch.Generator] = None,
+    check_nan: bool = False,
+    seed: Optional[int] = None,
+    offset: Optional[int] = None,
+) -> torch.Tensor:
+    """Top-k sampling from probabilities.
+
+    Parameters
+    ----------
+    probs : torch.Tensor
+        ``(batch_size, num_classes)``.
+    top_k : Union[torch.Tensor, int]
+        Top-k threshold.
+    indices, deterministic, generator, check_nan, seed, offset
+        See FlashInfer docs.
+
+    Returns
+    -------
+    torch.Tensor
+        Sampled token ids, shape ``(batch_size,)``.
+    """
+    if _HAS_FLASHINFER:
+        return _fi_sampling.top_k_sampling_from_probs(
+            probs,
+            top_k,
+            indices=indices,
+            deterministic=deterministic,
+            generator=generator,
+            check_nan=check_nan,
+            seed=seed,
+            offset=offset,
+        )
+
+    p = _resolve_indices(probs, indices).float()
+    renormed = _torch_top_k_renorm_probs(p, top_k)
+    samples = torch.multinomial(renormed, num_samples=1, generator=generator).squeeze(
+        -1
+    )
+    return samples.to(torch.int32)
+
+
+# ---------------------------------------------------------------------------
+# min_p_sampling_from_probs
+# ---------------------------------------------------------------------------
+
+
+def min_p_sampling_from_probs(
+    probs: torch.Tensor,
+    min_p: Union[torch.Tensor, float],
+    indices: Optional[torch.Tensor] = None,
+    deterministic: bool = True,
+    generator: Optional[torch.Generator] = None,
+    check_nan: bool = False,
+    seed: Optional[int] = None,
+    offset: Optional[int] = None,
+) -> torch.Tensor:
+    """Min-p sampling from probabilities.
+
+    Parameters
+    ----------
+    probs : torch.Tensor
+        ``(batch_size, num_classes)``.
+    min_p : Union[torch.Tensor, float]
+        Min-p threshold.
+    indices, deterministic, generator, check_nan, seed, offset
+        See FlashInfer docs.
+
+    Returns
+    -------
+    torch.Tensor
+        Sampled token ids, shape ``(batch_size,)``.
+    """
+    if _HAS_FLASHINFER:
+        return _fi_sampling.min_p_sampling_from_probs(
+            probs,
+            min_p,
+            indices=indices,
+            deterministic=deterministic,
+            generator=generator,
+            check_nan=check_nan,
+            seed=seed,
+            offset=offset,
+        )
+
+    p = _resolve_indices(probs, indices).float()
+    batch_size = p.shape[0]
+    min_p_t = _to_scalar_or_tensor(min_p, batch_size, p.device)
+    # min-p: keep tokens whose probability >= min_p * max_prob
+    max_probs = p.max(dim=-1, keepdim=True).values  # (B,1)
+    threshold = min_p_t.unsqueeze(-1) * max_probs  # (B,1)
+    mask = p < threshold
+    filtered = p.clone()
+    filtered[mask] = 0.0
+    # renormalize
+    sums = filtered.sum(dim=-1, keepdim=True)
+    sums = sums.clamp(min=1e-8)
+    filtered = filtered / sums
+    samples = torch.multinomial(filtered, num_samples=1, generator=generator).squeeze(
+        -1
+    )
+    return samples.to(torch.int32)
+
+
+# ---------------------------------------------------------------------------
+# top_k_top_p_sampling_from_logits
+# ---------------------------------------------------------------------------
+
+
+def top_k_top_p_sampling_from_logits(
+    logits: torch.Tensor,
+    top_k: Union[torch.Tensor, int],
+    top_p: Union[torch.Tensor, float],
+    indices: Optional[torch.Tensor] = None,
+    filter_apply_order: str = "top_k_first",
+    deterministic: bool = True,
+    generator: Optional[torch.Generator] = None,
+    check_nan: bool = False,
+    seed: Optional[int] = None,
+    offset: Optional[int] = None,
+) -> torch.Tensor:
+    """Top-k + top-p sampling from pre-softmax logits.
+
+    Parameters
+    ----------
+    logits : torch.Tensor
+        ``(batch_size, num_classes)``.
+    top_k : Union[torch.Tensor, int]
+    top_p : Union[torch.Tensor, float]
+    filter_apply_order : str
+        ``"top_k_first"`` or ``"joint"``.
+    indices, deterministic, generator, check_nan, seed, offset
+        See FlashInfer docs.
+
+    Returns
+    -------
+    torch.Tensor
+        Sampled token ids, shape ``(batch_size,)``.
+    """
+    if _HAS_FLASHINFER:
+        return _fi_sampling.top_k_top_p_sampling_from_logits(
+            logits,
+            top_k,
+            top_p,
+            indices=indices,
+            filter_apply_order=filter_apply_order,
+            deterministic=deterministic,
+            generator=generator,
+            check_nan=check_nan,
+            seed=seed,
+            offset=offset,
+        )
+
+    probs = torch.softmax(logits.float(), dim=-1)
+    return top_k_top_p_sampling_from_probs(
+        probs,
+        top_k,
+        top_p,
+        indices=indices,
+        filter_apply_order=filter_apply_order,
+        deterministic=deterministic,
+        generator=generator,
+        check_nan=check_nan,
+    )
+
+
+# ---------------------------------------------------------------------------
+# top_k_top_p_sampling_from_probs
+# ---------------------------------------------------------------------------
+
+
+def top_k_top_p_sampling_from_probs(
+    probs: torch.Tensor,
+    top_k: Union[torch.Tensor, int],
+    top_p: Union[torch.Tensor, float],
+    indices: Optional[torch.Tensor] = None,
+    filter_apply_order: str = "top_k_first",
+    deterministic: bool = True,
+    generator: Optional[torch.Generator] = None,
+    check_nan: bool = False,
+    seed: Optional[int] = None,
+    offset: Optional[int] = None,
+) -> torch.Tensor:
+    """Top-k + top-p sampling from probabilities.
+
+    Parameters
+    ----------
+    probs : torch.Tensor
+        ``(batch_size, num_classes)``.
+    top_k : Union[torch.Tensor, int]
+    top_p : Union[torch.Tensor, float]
+    filter_apply_order : str
+        ``"top_k_first"`` or ``"joint"``.
+    indices, deterministic, generator, check_nan, seed, offset
+        See FlashInfer docs.
+
+    Returns
+    -------
+    torch.Tensor
+        Sampled token ids, shape ``(batch_size,)``.
+    """
+    if _HAS_FLASHINFER:
+        return _fi_sampling.top_k_top_p_sampling_from_probs(
+            probs,
+            top_k,
+            top_p,
+            indices=indices,
+            filter_apply_order=filter_apply_order,
+            deterministic=deterministic,
+            generator=generator,
+            check_nan=check_nan,
+            seed=seed,
+            offset=offset,
+        )
+
+    p = _resolve_indices(probs, indices).float()
+    if filter_apply_order == "top_k_first":
+        p = _torch_top_k_renorm_probs(p, top_k)
+        p = _torch_top_p_renorm_probs(p, top_p)
+    else:
+        # joint: apply both filters simultaneously
+        p = _torch_top_k_renorm_probs(p, top_k)
+        p = _torch_top_p_renorm_probs(p, top_p)
+    samples = torch.multinomial(p, num_samples=1, generator=generator).squeeze(-1)
+    return samples.to(torch.int32)
+
+
+# ---------------------------------------------------------------------------
+# top_p_renorm_probs
+# ---------------------------------------------------------------------------
+
+
+def top_p_renorm_probs(
+    probs: torch.Tensor,
+    top_p: Union[torch.Tensor, float],
+) -> torch.Tensor:
+    """Renormalize probabilities by top-p thresholding.
+
+    Parameters
+    ----------
+    probs : torch.Tensor
+        ``(batch_size, num_classes)``.
+    top_p : Union[torch.Tensor, float]
+        Top-p threshold in ``(0, 1)``.
+
+    Returns
+    -------
+    torch.Tensor
+        Renormalized probabilities.
+    """
+    if _HAS_FLASHINFER:
+        return _fi_sampling.top_p_renorm_probs(probs, top_p)
+
+    return _torch_top_p_renorm_probs(probs.float(), top_p).to(probs.dtype)
+
+
+def _torch_top_p_renorm_probs(
+    probs: torch.Tensor,
+    top_p: Union[torch.Tensor, float],
+) -> torch.Tensor:
+    """Pure-torch top-p renormalization (operates on float32)."""
+    sorted_probs, sorted_indices = torch.sort(probs, dim=-1, descending=True)
+    cumsum = torch.cumsum(sorted_probs, dim=-1)
+
+    if isinstance(top_p, (int, float)):
+        mask = cumsum - sorted_probs > top_p
+    else:
+        top_p_t = top_p.unsqueeze(-1)
+        mask = cumsum - sorted_probs > top_p_t
+
+    sorted_probs[mask] = 0.0
+    # scatter back
+    result = torch.zeros_like(probs)
+    result.scatter_(1, sorted_indices, sorted_probs)
+    # renormalize
+    sums = result.sum(dim=-1, keepdim=True).clamp(min=1e-8)
+    return result / sums
+
+
+# ---------------------------------------------------------------------------
+# top_k_renorm_probs
+# ---------------------------------------------------------------------------
+
+
+def top_k_renorm_probs(
+    probs: torch.Tensor,
+    top_k: Union[torch.Tensor, int],
+) -> torch.Tensor:
+    """Renormalize probabilities by top-k thresholding.
+
+    Parameters
+    ----------
+    probs : torch.Tensor
+        ``(batch_size, num_classes)``.
+    top_k : Union[torch.Tensor, int]
+        Top-k threshold.
+
+    Returns
+    -------
+    torch.Tensor
+        Renormalized probabilities.
+    """
+    if _HAS_FLASHINFER:
+        return _fi_sampling.top_k_renorm_probs(probs, top_k)
+
+    return _torch_top_k_renorm_probs(probs.float(), top_k).to(probs.dtype)
+
+
+def _torch_top_k_renorm_probs(
+    probs: torch.Tensor,
+    top_k: Union[torch.Tensor, int],
+) -> torch.Tensor:
+    """Pure-torch top-k renormalization (operates on float32)."""
+    if isinstance(top_k, int):
+        # uniform top_k across batch
+        topk_vals, _ = torch.topk(probs, top_k, dim=-1)
+        threshold = topk_vals[:, -1:]  # (B, 1)
+    else:
+        # per-request top_k: use sorting
+        sorted_probs, _ = torch.sort(probs, dim=-1, descending=True)
+        # gather the k-th value for each row
+        k_indices = (top_k.long() - 1).unsqueeze(-1)  # (B, 1)
+        threshold = sorted_probs.gather(1, k_indices)  # (B, 1)
+
+    mask = probs < threshold
+    filtered = probs.clone()
+    filtered[mask] = 0.0
+    sums = filtered.sum(dim=-1, keepdim=True).clamp(min=1e-8)
+    return filtered / sums
+
+
+# ---------------------------------------------------------------------------
+# top_k_mask_logits
+# ---------------------------------------------------------------------------
+
+
+def top_k_mask_logits(
+    logits: torch.Tensor,
+    top_k: Union[torch.Tensor, int],
+) -> torch.Tensor:
+    """Mask logits by top-k thresholding (set non-top-k to -inf).
+
+    Parameters
+    ----------
+    logits : torch.Tensor
+        ``(batch_size, num_classes)``.
+    top_k : Union[torch.Tensor, int]
+        Top-k threshold.
+
+    Returns
+    -------
+    torch.Tensor
+        Masked logits with the same shape and dtype.
+    """
+    if _HAS_FLASHINFER:
+        return _fi_sampling.top_k_mask_logits(logits, top_k)
+
+    if isinstance(top_k, int):
+        topk_vals, _ = torch.topk(logits, top_k, dim=-1)
+        threshold = topk_vals[:, -1:]
+    else:
+        sorted_logits, _ = torch.sort(logits, dim=-1, descending=True)
+        k_indices = (top_k.long() - 1).unsqueeze(-1)
+        threshold = sorted_logits.gather(1, k_indices)
+
+    mask = logits < threshold
+    result = logits.clone()
+    result[mask] = float("-inf")
+    return result
+
+
+# ---------------------------------------------------------------------------
+# chain_speculative_sampling
+# ---------------------------------------------------------------------------
+
+
+def chain_speculative_sampling(
+    draft_probs: torch.Tensor,
+    draft_token_ids: torch.Tensor,
+    target_probs: torch.Tensor,
+    maybe_output_accepted_token_num: Optional[torch.Tensor] = None,
+    maybe_output_emitted_draft_token_num: Optional[torch.Tensor] = None,
+    deterministic: bool = True,
+    generator: Optional[torch.Generator] = None,
+    seed: Optional[int] = None,
+    offset: Optional[int] = None,
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Speculative sampling for sequence generation.
+
+    Parameters
+    ----------
+    draft_probs : torch.Tensor
+        ``(batch_size, num_speculate_tokens, vocab_size)``.
+    draft_token_ids : torch.Tensor
+        ``(batch_size, num_speculate_tokens)``.
+    target_probs : torch.Tensor
+        ``(batch_size, num_speculate_tokens + 1, vocab_size)``.
+    maybe_output_accepted_token_num : Optional[torch.Tensor]
+        If provided, accepted counts are added in-place.
+    maybe_output_emitted_draft_token_num : Optional[torch.Tensor]
+        If provided, emitted counts are added in-place.
+    deterministic, generator, seed, offset
+        See FlashInfer docs.
+
+    Returns
+    -------
+    output_token_ids : torch.Tensor
+        ``(batch_size, num_speculate_tokens + 1)``, rejected slots padded with -1.
+    output_accepted_token_num : torch.Tensor
+        ``(batch_size,)``.
+    output_emitted_draft_token_num : torch.Tensor
+        ``(batch_size,)``.
+    """
+    if _HAS_FLASHINFER:
+        return _fi_sampling.chain_speculative_sampling(
+            draft_probs,
+            draft_token_ids,
+            target_probs,
+            maybe_output_accepted_token_num=maybe_output_accepted_token_num,
+            maybe_output_emitted_draft_token_num=maybe_output_emitted_draft_token_num,
+            deterministic=deterministic,
+            generator=generator,
+            seed=seed,
+            offset=offset,
+        )
+
+    return _torch_chain_speculative_sampling(
+        draft_probs,
+        draft_token_ids,
+        target_probs,
+        maybe_output_accepted_token_num,
+        maybe_output_emitted_draft_token_num,
+        generator,
+    )
+
+
+def _torch_chain_speculative_sampling(
+    draft_probs: torch.Tensor,
+    draft_token_ids: torch.Tensor,
+    target_probs: torch.Tensor,
+    maybe_output_accepted_token_num: Optional[torch.Tensor],
+    maybe_output_emitted_draft_token_num: Optional[torch.Tensor],
+    generator: Optional[torch.Generator],
+) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Pure-torch chain speculative sampling.
+
+    Implements the rejection-sampling algorithm from
+    "Accelerating Large Language Model Decoding with Speculative Sampling"
+    (Leviathan et al., 2023).
+    """
+    batch_size, num_spec, vocab_size = draft_probs.shape
+    device = draft_probs.device
+
+    output_ids = torch.full(
+        (batch_size, num_spec + 1), -1, dtype=torch.int32, device=device
+    )
+    accepted_count = torch.zeros(batch_size, dtype=torch.int32, device=device)
+    emitted_count = torch.zeros(batch_size, dtype=torch.int32, device=device)
+
+    for b in range(batch_size):
+        all_accepted = True
+        for t in range(num_spec):
+            draft_tok = draft_token_ids[b, t].item()
+            p_draft = draft_probs[b, t, draft_tok].item()
+            p_target = target_probs[b, t, draft_tok].item()
+
+            # independent acceptance check (for the metric)
+            if p_target >= p_draft:
+                accepted_count[b] += 1
+            else:
+                r = torch.rand(1, generator=generator, device=device).item()
+                if r < p_target / max(p_draft, 1e-10):
+                    accepted_count[b] += 1
+
+            # sequential chain: accept / reject
+            if all_accepted:
+                r = torch.rand(1, generator=generator, device=device).item()
+                if r < min(1.0, p_target / max(p_draft, 1e-10)):
+                    output_ids[b, t] = draft_tok
+                    emitted_count[b] += 1
+                else:
+                    # reject: sample from max(0, p_target - p_draft)
+                    diff = target_probs[b, t].float() - draft_probs[b, t].float()
+                    diff = torch.clamp(diff, min=0.0)
+                    dsum = diff.sum()
+                    if dsum > 1e-8:
+                        diff = diff / dsum
+                    else:
+                        diff = target_probs[b, t].float()
+                        diff = diff / diff.sum().clamp(min=1e-8)
+                    resampled = torch.multinomial(
+                        diff.unsqueeze(0), num_samples=1, generator=generator
+                    ).item()
+                    output_ids[b, t] = resampled
+                    emitted_count[b] += 1
+                    all_accepted = False
+
+        # bonus token (sampled from target at position after last emitted)
+        if all_accepted:
+            pos = num_spec
+            bonus_probs = target_probs[b, pos].float()
+            bonus_probs = bonus_probs / bonus_probs.sum().clamp(min=1e-8)
+            bonus = torch.multinomial(
+                bonus_probs.unsqueeze(0), num_samples=1, generator=generator
+            ).item()
+            output_ids[b, num_spec] = bonus
+
+    if maybe_output_accepted_token_num is not None:
+        maybe_output_accepted_token_num.add_(accepted_count)
+    if maybe_output_emitted_draft_token_num is not None:
+        maybe_output_emitted_draft_token_num.add_(emitted_count)
+
+    return output_ids, accepted_count, emitted_count
+
+
+# ---------------------------------------------------------------------------
+# Aliases (FlashInfer also exposes these)
+# ---------------------------------------------------------------------------
+top_p_renorm_prob = top_p_renorm_probs
+top_k_renorm_prob = top_k_renorm_probs

From 2cf50f40c760137ad439ea45afc73f61bac18c5a Mon Sep 17 00:00:00 2001
From: chenghuaWang <2923277184@qq.com>
Date: Mon, 9 Mar 2026 07:48:45 +0000
Subject: [PATCH 13/13] feat(cuda): add fused GDN decode and RMSNorm+SiLU
 gating kernels for attention

- Implement fused GDN decode kernel performing gating, L2 normalization, delta update,
  and output computation in a single CUDA kernel optimized for SM80+ architectures
- Add fused RMSNorm with optional SiLU gating kernel for Qwen3.5 GDN attention
- Provide JIT Python wrappers for both kernels to enable easy integration
- Extend CUDA JIT module imports to include new gdn_decode kernel
- Update global and server configs with new backend options and default values
- Enhance argument parsing in global_config to support literal choice constraints
- Add fields for multimodal M-RoPE and vision inputs in ForwardBatch for decoding
- Implement EOS token ID extraction and max output tokens normalization in launcher module
---
 .gitignore                                    |    1 +
 .../mllm_kernel/cuda/csrc/gdn_decode.cuh      |  432 ++++++
 .../mllm_kernel/cuda/csrc/rms_norm_gated.cuh  |  212 +++
 mllm-kernel/mllm_kernel/cuda/jit/__init__.py  |    3 +-
 .../mllm_kernel/cuda/jit/gdn_decode.py        |  114 ++
 .../mllm_kernel/cuda/jit/rms_norm_gated.py    |   87 ++
 pymllm/configs/global_config.py               |   31 +-
 pymllm/configs/server_config.py               |   23 +-
 pymllm/engine/forward_batch.py                |    9 +
 pymllm/engine/launch.py                       |  206 ++-
 pymllm/executor/__init__.py                   |   10 +
 pymllm/executor/cuda_graph_runner.py          |  590 ++++++++
 pymllm/executor/model_runner.py               | 1198 +++++++++++++++
 pymllm/layers/__init__.py                     |    4 +
 pymllm/layers/attention/__init__.py           |    8 +
 pymllm/layers/attention/attention_backend.py  |   22 +
 pymllm/layers/attention/gdn_backend.py        |  660 ++++++++
 pymllm/layers/attention/hybrid_backend.py     |  184 +++
 .../attention/radix_linear_attention.py       |  116 ++
 pymllm/layers/gated_delta_net.py              |  168 +++
 pymllm/layers/rms_norm.py                     |   24 +-
 pymllm/layers/rms_norm_gated.py               |  154 ++
 pymllm/layers/rope.py                         |  147 +-
 pymllm/layers/sampling.py                     |    9 +
 pymllm/mem_cache/memory_pool.py               |  159 ++
 pymllm/mem_cache/radix_cache.py               |   40 +-
 pymllm/models/__init__.py                     |   62 +
 pymllm/models/qwen3_5.py                      |  530 +++++++
 pymllm/models/qwen3_vl.py                     | 1329 +++++++++++++++++
 pymllm/orchestrator/async_disk_io_process.py  |   84 --
 pymllm/orchestrator/detokenizer_process.py    |  116 +-
 pymllm/orchestrator/ipc_utils.py              |   22 +
 pymllm/orchestrator/model_runner_process.py   | 1007 +++++++++++--
 .../orchestrator/request_response_process.py  |   23 +-
 pymllm/orchestrator/scheduler_process.py      |  820 +++++++++-
 pymllm/orchestrator/tokenizer_process.py      |    4 +-
 pymllm/parsers/__init__.py                    |   10 +
 pymllm/parsers/reasoning_parser.py            |  212 +++
 pymllm/parsers/tool_call_parser.py            |  433 ++++++
 pymllm/server/launch.py                       |  923 +++++++++++-
 40 files changed, 9810 insertions(+), 376 deletions(-)
 create mode 100644 mllm-kernel/mllm_kernel/cuda/csrc/gdn_decode.cuh
 create mode 100644 mllm-kernel/mllm_kernel/cuda/csrc/rms_norm_gated.cuh
 create mode 100644 mllm-kernel/mllm_kernel/cuda/jit/gdn_decode.py
 create mode 100644 mllm-kernel/mllm_kernel/cuda/jit/rms_norm_gated.py
 create mode 100644 pymllm/layers/attention/gdn_backend.py
 create mode 100644 pymllm/layers/attention/hybrid_backend.py
 create mode 100644 pymllm/layers/attention/radix_linear_attention.py
 create mode 100644 pymllm/layers/gated_delta_net.py
 create mode 100644 pymllm/layers/rms_norm_gated.py
 create mode 100644 pymllm/models/qwen3_5.py
 create mode 100644 pymllm/models/qwen3_vl.py
 delete mode 100644 pymllm/orchestrator/async_disk_io_process.py
 create mode 100644 pymllm/parsers/__init__.py
 create mode 100644 pymllm/parsers/reasoning_parser.py
 create mode 100644 pymllm/parsers/tool_call_parser.py

diff --git a/.gitignore b/.gitignore
index cdafc270..7f14b37e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 .cache/
 .tmp/
 compile_commands.json
+settings.local.json
 
 # MLLM Team Specific
 tasks/mllmteam*
diff --git a/mllm-kernel/mllm_kernel/cuda/csrc/gdn_decode.cuh b/mllm-kernel/mllm_kernel/cuda/csrc/gdn_decode.cuh
new file mode 100644
index 00000000..4c2833c0
--- /dev/null
+++ b/mllm-kernel/mllm_kernel/cuda/csrc/gdn_decode.cuh
@@ -0,0 +1,432 @@
+// Fused GDN (Gated Delta Net) decode kernel for linear attention.
+//
+// Performs a single-token recurrent update per request:
+//   g     = -exp(A_log) * softplus(a + dt_bias)
+//   beta  = sigmoid(b)
+//   q     = L2norm(q) * scale
+//   k     = L2norm(k)
+//   state *= exp(g)                     (decay)
+//   v_delta = v - state @ k             (delta rule)
+//   v_delta *= beta                     (gated update)
+//   state += v_delta outer k            (state update)
+//   output = state @ q                  (readout)
+//
+// Works on SM80+ (Ampere, Jetson Orin, Hopper, ...).
+// Matches the algorithm of sglang's fused_sigmoid_gating_delta_rule_update.
+//
+// Grid : (NV, bs * HV)   where NV = ceil(V / BV)
+// Block: BLOCK_K threads  (one thread per K-dimension element)
+//
+// Each thread owns BV state elements at its K position.
+// Two cross-thread reductions (over K) compute delta and output dot products.
+
+#pragma once
+
+#include <mllm_kernel/tensor.hpp>
+#include <mllm_kernel/utils.cuh>
+
+#include <dlpack/dlpack.h>
+#include <tvm/ffi/container/tensor.h>
+
+#include <cuda_bf16.h>
+#include <cuda_fp16.h>
+#include <cuda_runtime.h>
+
+#include <cmath>
+
+namespace GDNDecodeKernel {
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+inline constexpr int BV = 32;  // V-dimension tile size
+
+// ---------------------------------------------------------------------------
+// Warp-level reduction
+// ---------------------------------------------------------------------------
+
+__device__ __forceinline__ float warp_reduce_sum(float val) {
+    #pragma unroll
+    for (int offset = 16; offset > 0; offset >>= 1) {
+        val += __shfl_xor_sync(0xffffffff, val, offset);
+    }
+    return val;
+}
+
+// ---------------------------------------------------------------------------
+// Type conversion helpers
+// ---------------------------------------------------------------------------
+
+template <typename T>
+__device__ __forceinline__ float to_float(T val);
+
+template <>
+__device__ __forceinline__ float to_float<__half>(__half val) {
+    return __half2float(val);
+}
+
+template <>
+__device__ __forceinline__ float to_float<__nv_bfloat16>(__nv_bfloat16 val) {
+    return __bfloat162float(val);
+}
+
+template <>
+__device__ __forceinline__ float to_float<float>(float val) {
+    return val;
+}
+
+template <typename T>
+__device__ __forceinline__ T from_float(float val);
+
+template <>
+__device__ __forceinline__ __half from_float<__half>(float val) {
+    return __float2half(val);
+}
+
+template <>
+__device__ __forceinline__ __nv_bfloat16 from_float<__nv_bfloat16>(float val) {
+    return __float2bfloat16(val);
+}
+
+template <>
+__device__ __forceinline__ float from_float<float>(float val) {
+    return val;
+}
+
+// ---------------------------------------------------------------------------
+// Block-level scalar reduction (sum across all threads → broadcast result)
+// ---------------------------------------------------------------------------
+
+// Reduces a scalar across all threads in the block.
+// Returns the sum in ALL threads (via shared memory broadcast).
+// smem must have at least (blockDim.x / 32) floats.
+__device__ __forceinline__ float block_reduce_sum(float val, float* smem) {
+    const int warp_id = threadIdx.x / 32;
+    const int lane_id = threadIdx.x % 32;
+    const int num_warps = blockDim.x / 32;
+
+    val = warp_reduce_sum(val);
+    if (lane_id == 0) smem[warp_id] = val;
+    __syncthreads();
+
+    // First warp reduces across warps
+    if (warp_id == 0) {
+        float v = (lane_id < num_warps) ? smem[lane_id] : 0.0f;
+        v = warp_reduce_sum(v);
+        if (lane_id == 0) smem[0] = v;
+    }
+    __syncthreads();
+    return smem[0];
+}
+
+// ---------------------------------------------------------------------------
+// Block-level vector reduction: BV independent sums across all K threads
+// ---------------------------------------------------------------------------
+
+// Each thread contributes partial[0..BV-1].  After this call, the results
+// are written to out[0..BV-1] and are valid in all threads.
+// reduce_buf must have at least BV * num_warps floats.
+// broadcast_buf must have at least BV floats.
+__device__ __forceinline__ void block_reduce_bv(
+    float partial[BV],
+    float* reduce_buf,  // [num_warps * BV]
+    float* broadcast_buf, // [BV]
+    float out[BV]
+) {
+    const int warp_id = threadIdx.x / 32;
+    const int lane_id = threadIdx.x % 32;
+    const int num_warps = blockDim.x / 32;
+
+    // Intra-warp reduction for each bv
+    #pragma unroll
+    for (int bv = 0; bv < BV; bv++) {
+        float val = warp_reduce_sum(partial[bv]);
+        if (lane_id == 0) {
+            reduce_buf[warp_id * BV + bv] = val;
+        }
+    }
+    __syncthreads();
+
+    // Inter-warp reduction: threads 0..BV-1 each reduce one bv
+    if (threadIdx.x < BV) {
+        float sum = 0.0f;
+        #pragma unroll 8
+        for (int w = 0; w < num_warps; w++) {
+            sum += reduce_buf[w * BV + threadIdx.x];
+        }
+        broadcast_buf[threadIdx.x] = sum;
+    }
+    __syncthreads();
+
+    // Broadcast to all threads
+    #pragma unroll
+    for (int bv = 0; bv < BV; bv++) {
+        out[bv] = broadcast_buf[bv];
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Main GDN decode kernel
+// ---------------------------------------------------------------------------
+
+template <typename T, int BLOCK_K>
+__global__ void gdn_decode_kernel(
+    const T* __restrict__ q_ptr,            // [bs, H, K]
+    const T* __restrict__ k_ptr,            // [bs, H, K]
+    const T* __restrict__ v_ptr,            // [bs, HV, V]
+    const T* __restrict__ a_ptr,            // [bs, HV]
+    const T* __restrict__ b_ptr,            // [bs, HV]
+    const float* __restrict__ A_log_ptr,    // [HV]
+    const float* __restrict__ dt_bias_ptr,  // [HV]
+    float* __restrict__ state_pool,         // [pool_size, HV, V, K]
+    const int64_t* __restrict__ cache_indices, // [bs]
+    T* __restrict__ output_ptr,             // [bs, HV, V]
+    const int bs,
+    const int H,        // num_k_heads
+    const int HV,       // num_v_heads
+    const int K,        // head_k_dim
+    const int V,        // head_v_dim
+    const float scale   // K^-0.5
+) {
+    // Block indices
+    const int bv_block = blockIdx.x;           // V-tile index
+    const int batch_head = blockIdx.y;         // batch * HV
+    const int i_n = batch_head / HV;           // batch index
+    const int i_hv = batch_head % HV;          // value head index
+    const int i_h = i_hv * H / HV;            // key head index (GQA mapping)
+    const int k_idx = threadIdx.x;             // K-dimension index
+    const int v_start = bv_block * BV;         // V-dimension start
+
+    if (i_n >= bs) return;
+
+    // Shared memory layout (declared dynamically)
+    extern __shared__ float smem[];
+    const int num_warps = BLOCK_K / 32;
+    float* sq            = smem;                          // [BLOCK_K]
+    float* sk            = smem + BLOCK_K;                // [BLOCK_K]
+    float* sv_broadcast  = smem + 2 * BLOCK_K;            // [BV]
+    float* warp_buf      = smem + 2 * BLOCK_K + BV;       // [num_warps]
+    float* reduce_buf    = smem + 2 * BLOCK_K + BV + num_warps; // [BV * num_warps]
+
+    // ===== 1. Load gating parameters and compute decay + beta =====
+    // All threads load the same scalars (cheap, avoids shared memory)
+    const float A_log_val = A_log_ptr[i_hv];
+    const float dt_bias_val = dt_bias_ptr[i_hv];
+    const float a_val = to_float(a_ptr[i_n * HV + i_hv]);
+    const float b_val = to_float(b_ptr[i_n * HV + i_hv]);
+
+    const float x = a_val + dt_bias_val;
+    // softplus with numerical stability: softplus(x) = log(1+exp(x)), or x for x>20
+    const float softplus_x = (x <= 20.0f) ? logf(1.0f + expf(x)) : x;
+    const float g = -expf(A_log_val) * softplus_x;
+    const float decay = expf(g);
+    const float beta = 1.0f / (1.0f + expf(-b_val));
+
+    // ===== 2. Load q, k and compute L2 norms =====
+    float q_val = 0.0f, k_val = 0.0f;
+    if (k_idx < K) {
+        q_val = to_float(q_ptr[i_n * H * K + i_h * K + k_idx]);
+        k_val = to_float(k_ptr[i_n * H * K + i_h * K + k_idx]);
+    }
+
+    // L2 norm: reduce q*q and k*k across block
+    float q_sq_sum = block_reduce_sum(q_val * q_val, warp_buf);
+    float k_sq_sum = block_reduce_sum(k_val * k_val, warp_buf);
+
+    float q_norm = rsqrtf(q_sq_sum + 1e-6f);
+    float k_norm = rsqrtf(k_sq_sum + 1e-6f);
+
+    // Store normalized q (scaled) and k in shared memory
+    if (k_idx < K) {
+        sq[k_idx] = q_val * q_norm * scale;
+        sk[k_idx] = k_val * k_norm;
+    } else {
+        sq[k_idx] = 0.0f;
+        sk[k_idx] = 0.0f;
+    }
+    __syncthreads();
+
+    // ===== 3. Load state elements for this thread =====
+    const int64_t pool_idx = cache_indices[i_n];
+    // state_pool layout: [pool_size, HV, V, K]
+    const int64_t state_base = pool_idx * HV * V * K + i_hv * V * K;
+
+    float state[BV];
+    #pragma unroll
+    for (int bv = 0; bv < BV; bv++) {
+        const int v_idx = v_start + bv;
+        if (v_idx < V && k_idx < K) {
+            state[bv] = state_pool[state_base + (int64_t)v_idx * K + k_idx];
+        } else {
+            state[bv] = 0.0f;
+        }
+    }
+
+    // ===== 4. Decay: state *= exp(g) =====
+    #pragma unroll
+    for (int bv = 0; bv < BV; bv++) {
+        state[bv] *= decay;
+    }
+
+    // ===== 5. Delta: v_delta[bv] = v[bv] - sum_k(state[bv,k] * k_norm[k]) =====
+    float partial_delta[BV];
+    const float my_k = sk[k_idx];
+    #pragma unroll
+    for (int bv = 0; bv < BV; bv++) {
+        partial_delta[bv] = state[bv] * my_k;
+    }
+
+    float delta[BV];
+    block_reduce_bv(partial_delta, reduce_buf, sv_broadcast, delta);
+
+    // Compute v_delta = (v - delta) * beta and broadcast to all threads.
+    // Threads 0..BV-1 each load one v element, compute v_delta, write to smem.
+    if (k_idx < BV) {
+        const int my_v_idx = v_start + k_idx;
+        float my_v = (my_v_idx < V)
+            ? to_float(v_ptr[i_n * HV * V + i_hv * V + my_v_idx])
+            : 0.0f;
+        sv_broadcast[k_idx] = (my_v - delta[k_idx]) * beta;
+    }
+    __syncthreads();
+
+    float v_delta[BV];
+    #pragma unroll
+    for (int bv = 0; bv < BV; bv++) {
+        v_delta[bv] = sv_broadcast[bv];
+    }
+
+    // ===== 6. State update: state[bv,k] += v_delta[bv] * k_norm[k] =====
+    #pragma unroll
+    for (int bv = 0; bv < BV; bv++) {
+        state[bv] += v_delta[bv] * my_k;
+    }
+
+    // ===== 7. Output: o[bv] = sum_k(state[bv,k] * q_norm_scaled[k]) =====
+    float partial_out[BV];
+    const float my_q = sq[k_idx];
+    #pragma unroll
+    for (int bv = 0; bv < BV; bv++) {
+        partial_out[bv] = state[bv] * my_q;
+    }
+
+    float out_vals[BV];
+    block_reduce_bv(partial_out, reduce_buf, sv_broadcast, out_vals);
+
+    // ===== 8. Store output =====
+    // output layout: [bs, HV, V]
+    if (k_idx < BV) {
+        const int v_idx = v_start + k_idx;
+        if (v_idx < V) {
+            output_ptr[i_n * HV * V + i_hv * V + v_idx] = from_float<T>(out_vals[k_idx]);
+        }
+    }
+
+    // ===== 9. Store state back to pool =====
+    #pragma unroll
+    for (int bv = 0; bv < BV; bv++) {
+        const int v_idx = v_start + bv;
+        if (v_idx < V && k_idx < K) {
+            state_pool[state_base + (int64_t)v_idx * K + k_idx] = state[bv];
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Launch wrapper (called via TVM FFI)
+// ---------------------------------------------------------------------------
+
+void run(
+    tvm::ffi::TensorView q,              // [bs, H, K]
+    tvm::ffi::TensorView k,              // [bs, H, K]
+    tvm::ffi::TensorView v,              // [bs, HV, V]
+    tvm::ffi::TensorView a,              // [bs, HV]
+    tvm::ffi::TensorView b,              // [bs, HV]
+    tvm::ffi::TensorView A_log,          // [HV]
+    tvm::ffi::TensorView dt_bias,        // [HV]
+    tvm::ffi::TensorView state_pool,     // [pool_size, HV, V, K]
+    tvm::ffi::TensorView cache_indices,  // [bs]
+    tvm::ffi::TensorView output          // [bs, HV, V]
+) {
+    using namespace mllm_kernel::host;
+
+    // --- Extract dimensions ---
+    auto BS  = SymbolicSize{"bs"};
+    auto H_  = SymbolicSize{"H"};
+    auto HV_ = SymbolicSize{"HV"};
+    auto K_  = SymbolicSize{"K"};
+    auto V_  = SymbolicSize{"V"};
+    auto PS  = SymbolicSize{"pool_size"};
+    auto dtype = SymbolicDType{};
+    auto device = SymbolicDevice{};
+    device.set_options<kDLCUDA>();
+    dtype.set_options<fp16_t, bf16_t>();
+
+    (void)TensorMatcher({BS, H_, K_}).with_dtype(dtype).with_device(device).verify(q);
+    (void)TensorMatcher({BS, H_, K_}).with_dtype(dtype).with_device(device).verify(k);
+    (void)TensorMatcher({BS, HV_, V_}).with_dtype(dtype).with_device(device).verify(v);
+    (void)TensorMatcher({BS, HV_}).with_dtype(dtype).with_device(device).verify(a);
+    (void)TensorMatcher({BS, HV_}).with_dtype(dtype).with_device(device).verify(b);
+    (void)TensorMatcher({HV_}).with_dtype<float>().with_device(device).verify(A_log);
+    (void)TensorMatcher({HV_}).with_dtype<float>().with_device(device).verify(dt_bias);
+    (void)TensorMatcher({PS, HV_, V_, K_}).with_dtype<float>().with_device(device).verify(state_pool);
+    (void)TensorMatcher({BS}).with_device(device).verify(cache_indices);
+    (void)TensorMatcher({BS, HV_, V_}).with_dtype(dtype).with_device(device).verify(output);
+
+    const int bs   = static_cast<int>(BS.unwrap());
+    const int H    = static_cast<int>(H_.unwrap());
+    const int HV   = static_cast<int>(HV_.unwrap());
+    const int K    = static_cast<int>(K_.unwrap());
+    const int V    = static_cast<int>(V_.unwrap());
+    const float scale = 1.0f / sqrtf(static_cast<float>(K));
+
+    // Block size = K (rounded up to warp multiple, max 1024)
+    int block_k = ((K + 31) / 32) * 32;
+    if (block_k > 1024) block_k = 1024;
+    const int num_warps = block_k / 32;
+
+    // Grid
+    const int NV = (V + BV - 1) / BV;
+    dim3 grid(NV, bs * HV);
+    dim3 block(block_k);
+
+    // Dynamic shared memory: sq[block_k] + sk[block_k] + sv[BV] + warp_buf[nw] + reduce[BV*nw]
+    const size_t smem_bytes = (2 * block_k + BV + num_warps + BV * num_warps) * sizeof(float);
+
+    const DLDevice dl_device = device.unwrap();
+
+    // Typed launch helper
+    #define LAUNCH_GDN_DECODE(CType, BKVAL)                                     \
+        LaunchKernel(grid, block, dl_device, smem_bytes)(                        \
+            gdn_decode_kernel<CType, BKVAL>,                                    \
+            static_cast<const CType*>(q.data_ptr()),                            \
+            static_cast<const CType*>(k.data_ptr()),                            \
+            static_cast<const CType*>(v.data_ptr()),                            \
+            static_cast<const CType*>(a.data_ptr()),                            \
+            static_cast<const CType*>(b.data_ptr()),                            \
+            static_cast<const float*>(A_log.data_ptr()),                        \
+            static_cast<const float*>(dt_bias.data_ptr()),                      \
+            static_cast<float*>(state_pool.data_ptr()),                         \
+            static_cast<const int64_t*>(cache_indices.data_ptr()),              \
+            static_cast<CType*>(output.data_ptr()),                             \
+            bs, H, HV, K, V, scale                                             \
+        )
+
+    // Dispatch based on dtype and block size
+    if (dtype.is_type<bf16_t>()) {
+        if      (block_k == 64)  { LAUNCH_GDN_DECODE(__nv_bfloat16, 64);  }
+        else if (block_k == 128) { LAUNCH_GDN_DECODE(__nv_bfloat16, 128); }
+        else if (block_k == 256) { LAUNCH_GDN_DECODE(__nv_bfloat16, 256); }
+        else                     { LAUNCH_GDN_DECODE(__nv_bfloat16, 256); }
+    } else {
+        if      (block_k == 64)  { LAUNCH_GDN_DECODE(__half, 64);  }
+        else if (block_k == 128) { LAUNCH_GDN_DECODE(__half, 128); }
+        else if (block_k == 256) { LAUNCH_GDN_DECODE(__half, 256); }
+        else                     { LAUNCH_GDN_DECODE(__half, 256); }
+    }
+
+    #undef LAUNCH_GDN_DECODE
+}
+
+}  // namespace GDNDecodeKernel
diff --git a/mllm-kernel/mllm_kernel/cuda/csrc/rms_norm_gated.cuh b/mllm-kernel/mllm_kernel/cuda/csrc/rms_norm_gated.cuh
new file mode 100644
index 00000000..b6124602
--- /dev/null
+++ b/mllm-kernel/mllm_kernel/cuda/csrc/rms_norm_gated.cuh
@@ -0,0 +1,212 @@
+// Fused RMSNorm with optional SiLU gating for Qwen3.5 GDN attention.
+//
+// Computes: output = rmsnorm(x, weight, eps) * silu(z)   (if z provided)
+//           output = rmsnorm(x, weight, eps)              (if z is null)
+//
+// Where: rmsnorm(x) = x / sqrt(mean(x^2) + eps) * weight
+//        silu(z) = z * sigmoid(z)
+//
+// This kernel fuses both operations into a single pass over the data,
+// maximizing memory bandwidth utilization.  Each block processes one row
+// (one token position).
+//
+// Supported dtypes: float16, bfloat16 (accumulation in float32).
+
+#pragma once
+
+#include <mllm_kernel/tensor.hpp>
+#include <mllm_kernel/utils.hpp>
+#include <mllm_kernel/utils.cuh>
+
+#include <dlpack/dlpack.h>
+#include <tvm/ffi/container/tensor.h>
+
+#include <cuda_fp16.h>
+#include <cuda_bf16.h>
+#include <cuda_runtime.h>
+
+namespace RMSNormGatedKernel {
+
+// ---------------------------------------------------------------------------
+// Warp-level reduction
+// ---------------------------------------------------------------------------
+
+__device__ __forceinline__ float warp_reduce_sum(float val) {
+    #pragma unroll
+    for (int offset = 16; offset > 0; offset >>= 1) {
+        val += __shfl_xor_sync(0xffffffff, val, offset);
+    }
+    return val;
+}
+
+// ---------------------------------------------------------------------------
+// Type conversion helpers
+// ---------------------------------------------------------------------------
+
+template <typename T>
+__device__ __forceinline__ float to_float(T val);
+
+template <>
+__device__ __forceinline__ float to_float<half>(half val) {
+    return __half2float(val);
+}
+
+template <>
+__device__ __forceinline__ float to_float<__nv_bfloat16>(__nv_bfloat16 val) {
+    return __bfloat162float(val);
+}
+
+template <>
+__device__ __forceinline__ float to_float<float>(float val) {
+    return val;
+}
+
+template <typename T>
+__device__ __forceinline__ T from_float(float val);
+
+template <>
+__device__ __forceinline__ half from_float<half>(float val) {
+    return __float2half(val);
+}
+
+template <>
+__device__ __forceinline__ __nv_bfloat16 from_float<__nv_bfloat16>(float val) {
+    return __float2bfloat16(val);
+}
+
+template <>
+__device__ __forceinline__ float from_float<float>(float val) {
+    return val;
+}
+
+// ---------------------------------------------------------------------------
+// Main kernel
+// ---------------------------------------------------------------------------
+
+template <typename T, int BLOCK_SIZE>
+__global__ void rms_norm_gated_kernel(
+    T* __restrict__ output,           // [M, N]
+    const T* __restrict__ input,      // [M, N]
+    const T* __restrict__ weight,     // [N]
+    const T* __restrict__ gate,       // [M, N] or nullptr
+    const int M,                      // number of rows
+    const int N,                      // number of columns (hidden_size)
+    const float eps
+) {
+    const int row = blockIdx.x;
+    if (row >= M) return;
+
+    const int tid = threadIdx.x;
+    const T* x_row = input + row * N;
+    T* out_row = output + row * N;
+    const T* z_row = (gate != nullptr) ? gate + row * N : nullptr;
+
+    // --- Pass 1: compute sum of squares ---
+    float sum_sq = 0.0f;
+    for (int col = tid; col < N; col += BLOCK_SIZE) {
+        float val = to_float(x_row[col]);
+        sum_sq += val * val;
+    }
+
+    // Block-level reduction
+    __shared__ float shared_sum[32];  // one per warp
+    int warp_id = tid / 32;
+    int lane_id = tid % 32;
+
+    sum_sq = warp_reduce_sum(sum_sq);
+    if (lane_id == 0) {
+        shared_sum[warp_id] = sum_sq;
+    }
+    __syncthreads();
+
+    // Final reduction in first warp
+    if (warp_id == 0) {
+        float val = (lane_id < (BLOCK_SIZE / 32)) ? shared_sum[lane_id] : 0.0f;
+        val = warp_reduce_sum(val);
+        if (lane_id == 0) {
+            shared_sum[0] = val;
+        }
+    }
+    __syncthreads();
+
+    float rms = rsqrtf(shared_sum[0] / (float)N + eps);
+
+    // --- Pass 2: normalize, scale by weight, optionally gate with silu(z) ---
+    for (int col = tid; col < N; col += BLOCK_SIZE) {
+        float val = to_float(x_row[col]);
+        float w = to_float(weight[col]);
+
+        float normed = val * rms * w;
+
+        if (z_row != nullptr) {
+            float z = to_float(z_row[col]);
+            // silu(z) = z * sigmoid(z)
+            float silu_z = z / (1.0f + expf(-z));
+            normed *= silu_z;
+        }
+
+        out_row[col] = from_float<T>(normed);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Launch wrapper (called via TVM FFI)
+// ---------------------------------------------------------------------------
+
+void run(
+    tvm::ffi::TensorView output,
+    tvm::ffi::TensorView input,
+    tvm::ffi::TensorView weight,
+    tvm::ffi::TensorView gate,      // empty tensor (numel==0) means no gate
+    double eps
+) {
+    using namespace mllm_kernel::host;
+
+    auto M = SymbolicSize{"M"};
+    auto N = SymbolicSize{"N"};
+    auto dtype = SymbolicDType{};
+    auto device = SymbolicDevice{};
+    device.set_options<kDLCUDA>();
+    dtype.set_options<fp16_t, bf16_t, float>();
+
+    (void)TensorMatcher({M, N}).with_dtype(dtype).with_device(device).verify(input);
+    (void)TensorMatcher({M, N}).with_dtype(dtype).with_device(device).verify(output);
+    (void)TensorMatcher({N}).with_dtype(dtype).with_device(device).verify(weight);
+
+    const int rows = static_cast<int>(M.unwrap());
+    const int cols = static_cast<int>(N.unwrap());
+    const bool has_gate = (gate.numel() > 0);
+
+    constexpr int BLOCK_SIZE = 256;
+
+    if (dtype.is_type<fp16_t>()) {
+        LaunchKernel(rows, BLOCK_SIZE, device.unwrap())(
+            rms_norm_gated_kernel<half, BLOCK_SIZE>,
+            static_cast<half*>(output.data_ptr()),
+            static_cast<const half*>(input.data_ptr()),
+            static_cast<const half*>(weight.data_ptr()),
+            has_gate ? static_cast<const half*>(gate.data_ptr()) : nullptr,
+            rows, cols, static_cast<float>(eps)
+        );
+    } else if (dtype.is_type<bf16_t>()) {
+        LaunchKernel(rows, BLOCK_SIZE, device.unwrap())(
+            rms_norm_gated_kernel<__nv_bfloat16, BLOCK_SIZE>,
+            static_cast<__nv_bfloat16*>(output.data_ptr()),
+            static_cast<const __nv_bfloat16*>(input.data_ptr()),
+            static_cast<const __nv_bfloat16*>(weight.data_ptr()),
+            has_gate ? static_cast<const __nv_bfloat16*>(gate.data_ptr()) : nullptr,
+            rows, cols, static_cast<float>(eps)
+        );
+    } else {
+        LaunchKernel(rows, BLOCK_SIZE, device.unwrap())(
+            rms_norm_gated_kernel<float, BLOCK_SIZE>,
+            static_cast<float*>(output.data_ptr()),
+            static_cast<const float*>(input.data_ptr()),
+            static_cast<const float*>(weight.data_ptr()),
+            has_gate ? static_cast<const float*>(gate.data_ptr()) : nullptr,
+            rows, cols, static_cast<float>(eps)
+        );
+    }
+}
+
+}  // namespace RMSNormGatedKernel
diff --git a/mllm-kernel/mllm_kernel/cuda/jit/__init__.py b/mllm-kernel/mllm_kernel/cuda/jit/__init__.py
index 202ff3b3..cc4ab667 100644
--- a/mllm-kernel/mllm_kernel/cuda/jit/__init__.py
+++ b/mllm-kernel/mllm_kernel/cuda/jit/__init__.py
@@ -1,4 +1,5 @@
 from .add_constant import add_constant
+from .gdn_decode import gdn_decode
 from .store_cache import can_use_store_cache, store_cache
 
-__all__ = ["add_constant", "can_use_store_cache", "store_cache"]
+__all__ = ["add_constant", "can_use_store_cache", "gdn_decode", "store_cache"]
diff --git a/mllm-kernel/mllm_kernel/cuda/jit/gdn_decode.py b/mllm-kernel/mllm_kernel/cuda/jit/gdn_decode.py
new file mode 100644
index 00000000..53aaeaab
--- /dev/null
+++ b/mllm-kernel/mllm_kernel/cuda/jit/gdn_decode.py
@@ -0,0 +1,114 @@
+"""Fused GDN decode CUDA JIT kernel.
+
+Performs a single-token GDN (Gated Delta Net) recurrent update per request,
+fusing gating + L2 normalization + delta rule + output computation into
+one kernel.  Works on SM80+ (Ampere, Jetson Orin, Hopper, ...).
+
+Usage::
+
+    from mllm_kernel.cuda.jit.gdn_decode import gdn_decode
+
+    output = gdn_decode(q, k, v, a, b, A_log, dt_bias, state_pool, cache_indices)
+"""
+
+from __future__ import annotations
+
+import torch
+
+from mllm_kernel.jit_utils import cache_once, jit
+
+
+@cache_once
+def _make_gdn_decode_kernel():
+    """JIT-compile the fused GDN decode CUDA kernel."""
+
+    @jit(
+        args=[],
+        device="cuda",
+        cuda_files=["gdn_decode.cuh"],
+        cpp_wrappers=[],
+        cuda_wrappers=[
+            ("gdn_decode", "GDNDecodeKernel::run"),
+        ],
+        func_name="gdn_decode",
+    )
+    def _kernel(
+        compiled_module,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        a: torch.Tensor,
+        b: torch.Tensor,
+        A_log: torch.Tensor,
+        dt_bias: torch.Tensor,
+        state_pool: torch.Tensor,
+        cache_indices: torch.Tensor,
+        output: torch.Tensor,
+    ) -> None:
+        compiled_module.gdn_decode(
+            q, k, v, a, b, A_log, dt_bias, state_pool, cache_indices, output
+        )
+
+    return _kernel
+
+
+def gdn_decode(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    v: torch.Tensor,
+    a: torch.Tensor,
+    b: torch.Tensor,
+    A_log: torch.Tensor,
+    dt_bias: torch.Tensor,
+    state_pool: torch.Tensor,
+    cache_indices: torch.Tensor,
+) -> torch.Tensor:
+    """Fused GDN decode: gating + L2 norm + delta rule + output.
+
+    Parameters
+    ----------
+    q : torch.Tensor
+        Query tensor, shape ``(bs, num_k_heads, head_k_dim)``, bf16/fp16.
+    k : torch.Tensor
+        Key tensor, shape ``(bs, num_k_heads, head_k_dim)``, bf16/fp16.
+    v : torch.Tensor
+        Value tensor, shape ``(bs, num_v_heads, head_v_dim)``, bf16/fp16.
+    a : torch.Tensor
+        Decay gate input, shape ``(bs, num_v_heads)``, bf16/fp16.
+    b : torch.Tensor
+        Update gate input, shape ``(bs, num_v_heads)``, bf16/fp16.
+    A_log : torch.Tensor
+        Log-space decay parameter, shape ``(num_v_heads,)``, float32.
+    dt_bias : torch.Tensor
+        Bias for decay gate, shape ``(num_v_heads,)``, float32.
+    state_pool : torch.Tensor
+        Pooled recurrent state, shape ``(pool_size, num_v_heads, head_v_dim, head_k_dim)``,
+        float32.  Modified in-place.
+    cache_indices : torch.Tensor
+        Pool indices per request, shape ``(bs,)``, int64.
+
+    Returns
+    -------
+    torch.Tensor
+        Output tensor, shape ``(bs, num_v_heads, head_v_dim)``, same dtype as v.
+    """
+    bs = q.shape[0]
+    num_v_heads = v.shape[1]
+    head_v_dim = v.shape[2]
+
+    output = torch.empty(bs, num_v_heads, head_v_dim, dtype=v.dtype, device=v.device)
+
+    kernel = _make_gdn_decode_kernel()
+    kernel(
+        q.contiguous(),
+        k.contiguous(),
+        v.contiguous(),
+        a.contiguous(),
+        b.contiguous(),
+        A_log.contiguous(),
+        dt_bias.contiguous(),
+        state_pool,
+        cache_indices.to(torch.int64).contiguous(),
+        output,
+    )
+    return output
diff --git a/mllm-kernel/mllm_kernel/cuda/jit/rms_norm_gated.py b/mllm-kernel/mllm_kernel/cuda/jit/rms_norm_gated.py
new file mode 100644
index 00000000..d7906a38
--- /dev/null
+++ b/mllm-kernel/mllm_kernel/cuda/jit/rms_norm_gated.py
@@ -0,0 +1,87 @@
+"""Fused RMSNorm + SiLU gating CUDA JIT kernel for Qwen3.5 GDN attention.
+
+Computes ``rmsnorm(x, weight, eps) * silu(z)`` in a single fused pass.
+
+Usage::
+
+    from mllm_kernel.cuda.jit.rms_norm_gated import rms_norm_gated
+
+    output = rms_norm_gated(x, weight, z=gate, eps=1e-6)
+"""
+
+from __future__ import annotations
+
+import torch
+
+from mllm_kernel.jit_utils import cache_once, jit
+
+
+@cache_once
+def _make_rms_norm_gated_kernel():
+    """JIT-compile the fused RMSNorm+gating CUDA kernel."""
+
+    @jit(
+        args=[],
+        device="cuda",
+        cuda_files=["rms_norm_gated.cuh"],
+        cpp_wrappers=[],
+        cuda_wrappers=[
+            ("rms_norm_gated", "RMSNormGatedKernel::run"),
+        ],
+        func_name="rms_norm_gated",
+    )
+    def _kernel(
+        compiled_module,
+        output: torch.Tensor,
+        input: torch.Tensor,
+        weight: torch.Tensor,
+        gate: torch.Tensor,
+        eps: float,
+    ) -> None:
+        compiled_module.rms_norm_gated(output, input, weight, gate, eps)
+
+    return _kernel
+
+
+def rms_norm_gated(
+    x: torch.Tensor,
+    weight: torch.Tensor,
+    z: torch.Tensor | None = None,
+    eps: float = 1e-6,
+) -> torch.Tensor:
+    """Fused RMSNorm with optional SiLU gating.
+
+    Parameters
+    ----------
+    x : torch.Tensor
+        Input tensor, shape ``(M, N)`` or ``(..., N)``.
+    weight : torch.Tensor
+        Normalization weight, shape ``(N,)``.
+    z : torch.Tensor or None
+        Optional gating tensor, same shape as ``x``.
+        If provided: ``output = rmsnorm(x) * silu(z)``
+    eps : float
+        Epsilon for numerical stability.
+
+    Returns
+    -------
+    torch.Tensor
+        Output with same shape and dtype as ``x``.
+    """
+    x_shape = x.shape
+    x_2d = x.reshape(-1, x.shape[-1])
+
+    if z is not None:
+        z_2d = z.reshape(-1, z.shape[-1])
+        if z_2d.stride(-1) != 1:
+            z_2d = z_2d.contiguous()
+    else:
+        z_2d = x.new_empty(0)  # empty tensor signals "no gate" to the kernel
+
+    if x_2d.stride(-1) != 1:
+        x_2d = x_2d.contiguous()
+
+    output = torch.empty_like(x_2d)
+    kernel = _make_rms_norm_gated_kernel()
+    kernel(output, x_2d, weight.contiguous(), z_2d, eps)
+    return output.reshape(x_shape)
diff --git a/pymllm/configs/global_config.py b/pymllm/configs/global_config.py
index 1761697b..711de3cd 100644
--- a/pymllm/configs/global_config.py
+++ b/pymllm/configs/global_config.py
@@ -127,6 +127,16 @@ def _converter_for_annotation(annotation: Any) -> Optional[Callable[[str], Any]]
     return None
 
 
+def _choices_for_annotation(annotation: Any) -> Optional[list]:
+    """Extract allowed values from a ``Literal`` annotation, if applicable."""
+
+    inner, _ = _unwrap_optional(annotation)
+    origin = get_origin(inner)
+    if origin is Literal:
+        return list(get_args(inner))
+    return None
+
+
 def _is_bool_annotation(annotation: Any) -> bool:
     """Return ``True`` if annotation represents a bool/Optional[bool] field."""
 
@@ -225,16 +235,27 @@ def make_args(
                 # Skip non-scalar or runtime-only fields (e.g. arbitrary objects).
                 continue
 
-            section_group.add_argument(
-                option,
+            choices = _choices_for_annotation(annotation)
+            kwargs: dict[str, Any] = dict(
                 dest=dest,
                 type=converter,
                 default=argparse.SUPPRESS,
-                help=(
+            )
+            if choices is not None:
+                kwargs["choices"] = choices
+                choices_str = ", ".join(str(c) for c in choices)
+                kwargs["help"] = (
+                    f"{section_name}.{dc_field.name} "
+                    f"{{choices: {choices_str}}} "
+                    f"(default: {_format_default_for_help(default_value)})."
+                )
+            else:
+                kwargs["help"] = (
                     f"{section_name}.{dc_field.name} (default: "
                     f"{_format_default_for_help(default_value)})."
-                ),
-            )
+                )
+
+            section_group.add_argument(option, **kwargs)
 
     return parser
 
diff --git a/pymllm/configs/server_config.py b/pymllm/configs/server_config.py
index f6a2090f..8727f7c1 100644
--- a/pymllm/configs/server_config.py
+++ b/pymllm/configs/server_config.py
@@ -40,19 +40,13 @@ class ServerConfig:
     max_queued_requests: Optional[int] = None
     max_total_tokens: Optional[int] = None
     chunked_prefill_size: Optional[int] = None
-    max_prefill_tokens: int = None
+    max_prefill_tokens: Optional[int] = None
     schedule_policy: Literal["auto", "fcfs"] = "fcfs"
     schedule_conservativeness: float = 1.0
     sleep_on_idle: bool = False
     stream_interval: int = 1
     stream_output: bool = True
 
-    # --------------------------------------------------------------------- #
-    # Threads
-    # --------------------------------------------------------------------- #
-    enable_disk_io_async: bool = False
-    disk_io_async_thread_count: int = 1
-
     # --------------------------------------------------------------------- #
     # Device
     # --------------------------------------------------------------------- #
@@ -62,23 +56,34 @@ class ServerConfig:
     # Backend / acceleration
     # --------------------------------------------------------------------- #
     attention_backend: Literal["auto", "flashinfer"] = "auto"
+    gdn_decode_backend: Literal["auto", "flashinfer", "mllm_kernel", "pytorch"] = "auto"
     sampling_backend: Optional[str] = None
     disable_cuda_graph: bool = False
-    enable_torch_compile: bool = True
+    enable_torch_compile: bool = False
     torch_compile_max_bs: int = 32
     random_seed: Optional[int] = 42
 
+    # --------------------------------------------------------------------- #
+    # Output parsers (reasoning / tool calls)
+    # --------------------------------------------------------------------- #
+    reasoning_parser: Optional[str] = None   # e.g. "deepseek-r1", "qwen3"
+    tool_call_parser: Optional[str] = None   # e.g. "qwen25", "llama3", "hermes"
+
     # --------------------------------------------------------------------- #
     # Logging and observability
     # --------------------------------------------------------------------- #
     log_level: Literal["debug", "info", "warning", "error", "critical"] = "info"
     enable_metrics: bool = False
     show_time_cost: bool = False
+    # Log prefill/decode throughput stats every N decode batches (0 = disabled)
+    decode_log_interval: int = 40
 
     # --------------------------------------------------------------------- #
     # Feature switches
     # --------------------------------------------------------------------- #
     enable_shared_queue: bool = False  # Use shared memory queue for fast IPC
+    disable_radix_cache: bool = False  # Disable radix-tree prefix caching
+    radix_cache_page_size: int = 1  # Number of tokens per KV-pool page in RadixCache
 
     # CUDA IPC transport for multimodal GPU tensors.
     # Requires enable_shared_queue=True to take effect.
@@ -161,5 +166,7 @@ def _validate(self) -> None:
             raise ValueError("`max_running_requests` must be > 0 when set.")
         if self.max_queued_requests is not None and self.max_queued_requests < 0:
             raise ValueError("`max_queued_requests` must be >= 0 when set.")
+        if self.radix_cache_page_size < 1:
+            raise ValueError("`radix_cache_page_size` must be >= 1.")
         if self.schedule_conservativeness <= 0:
             raise ValueError("`schedule_conservativeness` must be > 0.")
diff --git a/pymllm/engine/forward_batch.py b/pymllm/engine/forward_batch.py
index ebb715ff..428da7b6 100644
--- a/pymllm/engine/forward_batch.py
+++ b/pymllm/engine/forward_batch.py
@@ -180,3 +180,12 @@ class ForwardBatch:
 
     # ---- attention backend (set by model runner) ----
     attn_backend: Optional["AttentionBackend"] = None
+
+    # ---- multimodal M-RoPE ----
+    # Per-request position delta for M-RoPE decode steps.
+    # Set by the model during prefill; consumed during decode to offset positions.
+    mrope_position_deltas: Optional[torch.Tensor] = None  # [batch_size] int64
+
+    # ---- multimodal vision inputs (extend / prefill only) ----
+    pixel_values: Optional[torch.Tensor] = None
+    image_grid_thw: Optional[torch.Tensor] = None
diff --git a/pymllm/engine/launch.py b/pymllm/engine/launch.py
index 2ba04e1c..e5214511 100644
--- a/pymllm/engine/launch.py
+++ b/pymllm/engine/launch.py
@@ -28,12 +28,102 @@
 )
 from pymllm.orchestrator.tokenizer_process import run_tokenizer_process
 from pymllm.orchestrator.scheduler_process import run_scheduler_process
-from pymllm.orchestrator.model_runner_process import run_model_runner_process
 from pymllm.orchestrator.detokenizer_process import run_detokenizer_process
-from pymllm.orchestrator.async_disk_io_process import run_async_disk_io_process
 
 logger = logging.getLogger(__name__)
 
+# Standard HuggingFace config fields that indicate max output tokens,
+# checked in priority order.
+_MAX_NEW_TOKENS_FIELDS = (
+    "max_new_tokens",
+    "max_tokens",
+    "max_completion_tokens",
+)
+
+
+def _normalize_eos_raw(raw) -> List[int]:
+    """Normalize a raw eos_token_id value (int, list, or None) to a list."""
+    if raw is None:
+        return []
+    if isinstance(raw, int):
+        return [raw]
+    if isinstance(raw, (list, tuple)):
+        return [x for x in raw if isinstance(x, int)]
+    return []
+
+
+def _get_eos_token_ids(hf_config, model_path=None) -> List[int]:
+    """Extract EOS token ID(s) from a HuggingFace model config.
+
+    Searches in priority order:
+    1. ``hf_config.eos_token_id`` (top-level, standard models)
+    2. ``hf_config.text_config.eos_token_id`` (VL / multimodal models)
+    3. ``generation_config.json`` (many models store EOS here)
+    4. ``tokenizer_config.json`` via AutoTokenizer (last resort)
+    """
+    if hf_config is None:
+        return []
+
+    # 1. Top-level config
+    ids = _normalize_eos_raw(getattr(hf_config, "eos_token_id", None))
+    if ids:
+        return ids
+
+    # 2. Nested text_config (VL / multimodal models like Qwen3-VL)
+    text_config = getattr(hf_config, "text_config", None)
+    if text_config is not None:
+        ids = _normalize_eos_raw(getattr(text_config, "eos_token_id", None))
+        if ids:
+            return ids
+
+    # 3. generation_config.json (lightweight, just reads a JSON file)
+    if model_path is not None:
+        try:
+            from transformers import GenerationConfig
+
+            gen_cfg = GenerationConfig.from_pretrained(str(model_path))
+            ids = _normalize_eos_raw(getattr(gen_cfg, "eos_token_id", None))
+            if ids:
+                logger.info("EOS token IDs from generation_config.json: %s", ids)
+                return ids
+        except Exception:
+            pass
+
+    # 4. Tokenizer (last resort)
+    if model_path is not None:
+        try:
+            from transformers import AutoTokenizer
+
+            tok = AutoTokenizer.from_pretrained(str(model_path), trust_remote_code=True)
+            if tok.eos_token_id is not None:
+                ids = [tok.eos_token_id]
+                logger.info("EOS token ID from tokenizer: %s", ids)
+                return ids
+        except Exception:
+            pass
+
+    return []
+
+
+def _get_model_default_max_new_tokens(hf_config) -> Optional[int]:
+    """Extract max output token limit from a HuggingFace model config.
+
+    Checks standard fields in priority order.  Returns ``None`` when the
+    config does not specify any recognised output-length field.
+    """
+    if hf_config is None:
+        return None
+    for field_name in _MAX_NEW_TOKENS_FIELDS:
+        value = getattr(hf_config, field_name, None)
+        if value is not None and isinstance(value, int) and value > 0:
+            logger.info(
+                "Using model config %s=%d as default max_new_tokens",
+                field_name,
+                value,
+            )
+            return value
+    return None
+
 
 class Engine:
     def __init__(self):
@@ -59,20 +149,12 @@ def _launch_processes(self) -> None:
         addr_tokenizer_to_scheduler: str = make_ipc_address(
             "tokenizer_to_scheduler", uid
         )
-        addr_scheduler_to_model_runner: str = make_ipc_address(
-            "scheduler_to_model_runner", uid
-        )
-        addr_model_runner_to_scheduler: str = make_ipc_address(
-            "model_runner_to_scheduler", uid
-        )
         addr_scheduler_to_detokenizer: str = make_ipc_address(
             "scheduler_to_detokenizer", uid
         )
         addr_detokenizer_to_request_response: str = make_ipc_address(
             "detokenizer_to_request_response", uid
         )
-        addr_scheduler_to_disk_io: str = make_ipc_address("scheduler_to_disk_io", uid)
-
         # Record all subprocesses
         procs_and_readers: List[tuple] = []
 
@@ -114,6 +196,7 @@ def _launch_processes(self) -> None:
             "tensor_transport_mode": transport_mode,
             "cuda_ipc_pool_size_mb": cfg.server.cuda_ipc_pool_size_mb,
             "cuda_ipc_recycle_interval": cfg.server.cuda_ipc_recycle_interval,
+            "log_level": cfg.server.log_level,
         }
 
         # Tokenizer
@@ -131,39 +214,44 @@ def _launch_processes(self) -> None:
         )
         procs_and_readers.append((tokenizer_proc, tokenizer_reader, "tokenizer"))
 
-        # Scheduler
+        # Determine default max_new_tokens from model config (if available)
+        model_max_new_tokens = _get_model_default_max_new_tokens(
+            cfg.model.hf_config
+        )
+        scheduler_kwargs = {}
+        if model_max_new_tokens is not None:
+            scheduler_kwargs["default_max_new_tokens"] = model_max_new_tokens
+
+        # Extract EOS token ID(s) from model config
+        eos_token_ids = _get_eos_token_ids(cfg.model.hf_config, model_path=cfg.server.model_path)
+        if eos_token_ids:
+            scheduler_kwargs["eos_token_ids"] = eos_token_ids
+            logger.info("EOS token IDs for scheduler: %s", eos_token_ids)
+
+        # Model runner config — passed to the scheduler process which now
+        # owns the model runner in-process (sglang-style architecture).
+        scheduler_kwargs["server_config"] = cfg.server
+        scheduler_kwargs["model_config"] = cfg.model
+        scheduler_kwargs["gpu_id"] = cfg.server.base_gpu_id
+
+        # Scheduler (+ in-process model runner)
         scheduler_reader, scheduler_writer = mp.Pipe(duplex=False)
         scheduler_proc = mp.Process(
             target=run_scheduler_process,
             args=(
                 addr_tokenizer_to_scheduler,
-                addr_scheduler_to_model_runner,
-                addr_model_runner_to_scheduler,
                 addr_scheduler_to_detokenizer,
                 scheduler_writer,
                 shared_queue,  # Pass shared queue
                 enable_shared_queue,  # Pass flag
                 transport_mode,  # Pass tensor transport mode
+                cfg.server.log_level,  # Pass log level
             ),
+            kwargs=scheduler_kwargs,
             daemon=True,
         )
         procs_and_readers.append((scheduler_proc, scheduler_reader, "scheduler"))
 
-        # Model Runner
-        model_runner_reader, model_runner_writer = mp.Pipe(duplex=False)
-        model_runner_proc = mp.Process(
-            target=run_model_runner_process,
-            args=(
-                addr_scheduler_to_model_runner,
-                addr_model_runner_to_scheduler,
-                model_runner_writer,
-            ),
-            daemon=True,
-        )
-        procs_and_readers.append(
-            (model_runner_proc, model_runner_reader, "model_runner")
-        )
-
         # Detokenizer
         detokenizer_reader, detokenizer_writer = mp.Pipe(duplex=False)
         detokenizer_proc = mp.Process(
@@ -172,21 +260,12 @@ def _launch_processes(self) -> None:
                 addr_scheduler_to_detokenizer,
                 addr_detokenizer_to_request_response,
                 detokenizer_writer,
+                tokenizer_cfg,
             ),
             daemon=True,
         )
         procs_and_readers.append((detokenizer_proc, detokenizer_reader, "detokenizer"))
 
-        # Async Disk I/O
-        if get_global_config().server.enable_disk_io_async:
-            disk_io_reader, disk_io_writer = mp.Pipe(duplex=False)
-            disk_io_proc = mp.Process(
-                target=run_async_disk_io_process,
-                args=(addr_scheduler_to_disk_io, disk_io_writer),
-                daemon=True,
-            )
-            procs_and_readers.append((disk_io_proc, disk_io_reader, "async_disk_io"))
-
         # Start all subprocesses
         for proc, _, name in procs_and_readers:
             proc.start()
@@ -203,20 +282,15 @@ def _launch_processes(self) -> None:
                 raise RuntimeError(f"{name} process failed to initialise: {msg}")
             logger.info("%s process ready", name)
 
-        # RR Process is current main process
+        # RR Process is current main process — only bind ZMQ sockets here.
+        # Background tasks are started lazily by listen() on the first
+        # add_request(), so they always run on the correct event loop.
         self._rr_process = RequestResponseProcess(
             send_to_tokenizer_addr=addr_request_response_to_tokenizer,
             recv_from_detokenizer_addr=addr_detokenizer_to_request_response,
         )
-
-        try:
-            self._loop = asyncio.get_running_loop()
-        except RuntimeError:
-            self._loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(self._loop)
-
-        self._rr_process.start(self._loop)
-        logger.info("RequestResponseProcess started in main process")
+        self._rr_process.start()
+        logger.info("RequestResponseProcess sockets bound")
 
         # Print colorful gradient ASCII art banner
         if HAS_BANNER_LIBS:
@@ -296,7 +370,12 @@ async def _run() -> Union[Dict[str, Any], List[Dict[str, Any]]]:
             )
             return list(outputs)
 
-        return self._loop.run_until_complete(_run())
+        try:
+            loop = asyncio.get_event_loop()
+        except RuntimeError:
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+        return loop.run_until_complete(_run())
 
     async def generate_async(
         self,
@@ -354,10 +433,15 @@ async def generate_async(
                 else:
                     yield await self._wait_for_final_result(single_rid, state)
             finally:
-                self._rr_process.remove_state(single_rid)
+                if not state.finished:
+                    logger.info("Aborting request %s (client disconnected)", single_rid)
+                    await self._rr_process.abort_request(single_rid)
+                else:
+                    self._rr_process.remove_state(single_rid)
         else:
             rids_list: List[str] = rid if isinstance(rid, list) else [rid]  # type: ignore[assignment]
             states: List[ReqState] = result  # type: ignore[assignment]
+            _bg_tasks: List[asyncio.Task] = []
             try:
                 if stream:
                     # Merge streams from all sub-requests using an asyncio queue.
@@ -368,18 +452,18 @@ async def _forward(r: str, s: ReqState) -> None:
                             await queue.put(chunk)
                         await queue.put(None)  # sentinel
 
-                    tasks = [
+                    _bg_tasks = [
                         asyncio.create_task(_forward(r, s))
                         for r, s in zip(rids_list, states)
                     ]
                     done_count = 0
-                    while done_count < len(tasks):
+                    while done_count < len(_bg_tasks):
                         item = await queue.get()
                         if item is None:
                             done_count += 1
                         else:
                             yield item
-                    await asyncio.gather(*tasks)
+                    await asyncio.gather(*_bg_tasks)
                 else:
                     for coro in asyncio.as_completed(
                         [
@@ -389,8 +473,14 @@ async def _forward(r: str, s: ReqState) -> None:
                     ):
                         yield await coro
             finally:
-                for r in rids_list:
-                    self._rr_process.remove_state(r)
+                for t in _bg_tasks:
+                    t.cancel()
+                for r, s in zip(rids_list, states):
+                    if not s.finished:
+                        logger.info("Aborting request %s (client disconnected)", r)
+                        await self._rr_process.abort_request(r)
+                    else:
+                        self._rr_process.remove_state(r)
 
     @staticmethod
     async def _wait_for_final_result(rid: str, state: ReqState) -> Dict[str, Any]:
@@ -443,7 +533,11 @@ def shutdown(self) -> None:
         """Terminate all subprocesses."""
         if self._rr_process is not None:
             try:
-                self._loop.run_until_complete(self._rr_process.shutdown())
+                loop = asyncio.get_event_loop()
+                if loop.is_running():
+                    loop.create_task(self._rr_process.shutdown())
+                else:
+                    loop.run_until_complete(self._rr_process.shutdown())
             except Exception:
                 pass
         for proc in self._subprocesses:
diff --git a/pymllm/executor/__init__.py b/pymllm/executor/__init__.py
index e69de29b..b513b870 100644
--- a/pymllm/executor/__init__.py
+++ b/pymllm/executor/__init__.py
@@ -0,0 +1,10 @@
+"""Executor module: model loading, forward pass, and sampling."""
+
+from pymllm.executor.cuda_graph_runner import CudaGraphRunner
+from pymllm.executor.model_runner import LogitsProcessorOutput, ModelRunner
+
+__all__ = [
+    "CudaGraphRunner",
+    "LogitsProcessorOutput",
+    "ModelRunner",
+]
diff --git a/pymllm/executor/cuda_graph_runner.py b/pymllm/executor/cuda_graph_runner.py
index e69de29b..fe4fb0e9 100644
--- a/pymllm/executor/cuda_graph_runner.py
+++ b/pymllm/executor/cuda_graph_runner.py
@@ -0,0 +1,590 @@
+"""CUDA-graph accelerated forward pass for decode steps.
+
+Captures CUDA graphs for a set of discrete batch sizes so that the decode
+forward pass can be replayed without CPU-side kernel-launch overhead.
+
+Simplified from sglang's ``CudaGraphRunner`` for pymllm's single-GPU
+architecture.  Handles:
+
+* Pre-allocated input buffers (avoids per-step allocations)
+* CUDA-graph capture for each batch size
+* Optional ``torch.compile`` integration
+* Graph replay with padding to the nearest captured batch size
+
+Typical lifecycle::
+
+    runner = CudaGraphRunner(model_runner)   # captures all batch sizes
+
+    # --- inside the inference loop ---
+    if runner.can_run(forward_batch):
+        logits_output = runner.replay(forward_batch)
+    else:
+        logits_output = model_runner.forward(forward_batch)
+
+Integration with :class:`~pymllm.executor.model_runner.ModelRunner`
+-------------------------------------------------------------------
+The ``ModelRunner`` owns the ``CudaGraphRunner`` and delegates decode
+batches to it when the batch size is within the captured range.  The
+``CudaGraphRunner`` calls ``attn_backend.init_forward_metadata_*_cuda_graph``
+directly (bypassing the normal ``init_forward_metadata`` path) so that
+FlashInfer's per-batch planning is recorded inside the graph.
+"""
+
+from __future__ import annotations
+
+import bisect
+import gc
+import logging
+import time
+from contextlib import contextmanager
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union
+
+import torch
+
+from pymllm.engine.forward_batch import ForwardBatch, ForwardMode
+from pymllm.executor.model_runner import LogitsProcessorOutput
+
+if TYPE_CHECKING:
+    from pymllm.executor.model_runner import ModelRunner
+    from pymllm.layers.attention.attention_backend import AttentionBackend
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Global CUDA-graph memory pool (shared across all CudaGraphRunner instances)
+# ---------------------------------------------------------------------------
+
+_global_graph_memory_pool: Optional[tuple] = None
+
+
+def get_global_graph_memory_pool() -> Optional[tuple]:
+    """Return the shared CUDA graph memory pool handle."""
+    return _global_graph_memory_pool
+
+
+def set_global_graph_memory_pool(pool: tuple) -> None:
+    """Set the shared CUDA graph memory pool handle."""
+    global _global_graph_memory_pool
+    _global_graph_memory_pool = pool
+
+
+# ---------------------------------------------------------------------------
+# Context managers
+# ---------------------------------------------------------------------------
+
+# Flag indicating whether we are currently capturing a CUDA graph.
+_is_capture_mode: bool = False
+
+
+def is_capture_mode() -> bool:
+    """Return ``True`` if a CUDA-graph capture is in progress."""
+    return _is_capture_mode
+
+
+@contextmanager
+def model_capture_mode():
+    """Context manager that sets the global capture-mode flag."""
+    global _is_capture_mode
+    _is_capture_mode = True
+    try:
+        yield
+    finally:
+        _is_capture_mode = False
+
+
+@contextmanager
+def freeze_gc():
+    """Freeze the garbage collector during CUDA-graph capture.
+
+    GC activity during capture can interfere with the recorded stream
+    ordering.  This context manager collects garbage before capture,
+    freezes all surviving objects, and unfreezes + re-collects afterwards.
+    """
+    gc.collect()
+    gc.freeze()
+    try:
+        yield
+    finally:
+        gc.unfreeze()
+        gc.collect()
+
+
+# ---------------------------------------------------------------------------
+# Pre-allocated input buffers
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class _InputBuffers:
+    """Pre-allocated GPU tensors used as CUDA-graph inputs.
+
+    During graph capture these buffers are used as-is.  During replay the
+    real batch data is copied into the first ``batch_size`` rows while the
+    remaining padding rows retain their fill values.
+    """
+
+    input_ids: torch.Tensor  # [max_bs] int64
+    req_pool_indices: torch.Tensor  # [max_bs] int32
+    seq_lens: torch.Tensor  # [max_bs] int32
+    seq_lens_cpu: torch.Tensor  # [max_bs] int32 (CPU)
+    out_cache_loc: torch.Tensor  # [max_bs] int64
+    positions: torch.Tensor  # [max_bs] int64
+    mrope_position_deltas: torch.Tensor  # [max_bs] int64
+
+    @classmethod
+    def create(
+        cls,
+        *,
+        device: torch.device,
+        max_bs: int,
+        seq_len_fill_value: int,
+    ) -> "_InputBuffers":
+        """Allocate all buffers for the given maximum batch size."""
+        with torch.device(device):
+            input_ids = torch.zeros((max_bs,), dtype=torch.int64)
+            req_pool_indices = torch.zeros((max_bs,), dtype=torch.int32)
+            seq_lens = torch.full((max_bs,), seq_len_fill_value, dtype=torch.int32)
+            out_cache_loc = torch.zeros((max_bs,), dtype=torch.int64)
+            positions = torch.zeros((max_bs,), dtype=torch.int64)
+            mrope_position_deltas = torch.zeros((max_bs,), dtype=torch.int64)
+
+        # seq_lens_cpu must be a real CPU tensor.
+        seq_lens_cpu = torch.full(
+            (max_bs,),
+            seq_len_fill_value,
+            dtype=torch.int32,
+            device="cpu",
+        )
+
+        return cls(
+            input_ids=input_ids,
+            req_pool_indices=req_pool_indices,
+            seq_lens=seq_lens,
+            seq_lens_cpu=seq_lens_cpu,
+            out_cache_loc=out_cache_loc,
+            positions=positions,
+            mrope_position_deltas=mrope_position_deltas,
+        )
+
+    def populate(
+        self,
+        forward_batch: ForwardBatch,
+        padded_bs: int,
+        seq_len_fill_value: int,
+    ) -> None:
+        """Copy real batch data into the pre-allocated buffers.
+
+        Any padding slots (``[real_bs : padded_bs]``) are filled with safe
+        defaults so that the captured graph does not access invalid memory.
+        """
+        real_bs = forward_batch.batch_size
+
+        # Reset padding slots when the padded size exceeds the real size.
+        if padded_bs != real_bs:
+            self.seq_lens.fill_(seq_len_fill_value)
+            self.out_cache_loc.zero_()
+            self.mrope_position_deltas.zero_()
+
+        self.input_ids[:real_bs].copy_(forward_batch.input_ids)
+        self.req_pool_indices[:real_bs].copy_(forward_batch.req_pool_indices)
+        self.seq_lens[:real_bs].copy_(forward_batch.seq_lens)
+        self.out_cache_loc[:real_bs].copy_(forward_batch.out_cache_loc)
+        self.positions[:real_bs].copy_(forward_batch.positions)
+
+        # Copy M-RoPE position deltas (used by Qwen3-VL for multimodal).
+        if forward_batch.mrope_position_deltas is not None:
+            self.mrope_position_deltas[:real_bs].copy_(
+                forward_batch.mrope_position_deltas
+            )
+        else:
+            self.mrope_position_deltas[:real_bs].zero_()
+
+        if forward_batch.seq_lens_cpu is not None:
+            if padded_bs != real_bs:
+                self.seq_lens_cpu.fill_(seq_len_fill_value)
+            self.seq_lens_cpu[:real_bs].copy_(forward_batch.seq_lens_cpu)
+
+
+# ---------------------------------------------------------------------------
+# Batch-size schedule
+# ---------------------------------------------------------------------------
+
+
+def _default_capture_batch_sizes(max_bs: int) -> List[int]:
+    """Return a list of batch sizes to capture.
+
+    Uses the same schedule as sglang (non-speculative)::
+
+        [1, 2, 4, 8, 12, 16, 24, 32, 40, …, 256, 272, 288, …, 512, 544, …]
+
+    Capped at *max_bs*.
+    """
+    bs_list = (
+        [1, 2, 4, 8, 12]
+        + list(range(16, 257, 8))
+        + list(range(272, 512, 16))
+        + list(range(512, max_bs + 1, 32))
+    )
+    bs_list = sorted(set(bs for bs in bs_list if bs <= max_bs))
+    if not bs_list:
+        bs_list = [1]
+    return bs_list
+
+
+# ---------------------------------------------------------------------------
+# CudaGraphRunner
+# ---------------------------------------------------------------------------
+
+
+class CudaGraphRunner:
+    """Captures and replays CUDA graphs for decode-step forward passes.
+
+    This class is the pymllm equivalent of sglang's ``CudaGraphRunner``,
+    stripped of distributed, speculative-decoding, LoRA, mamba, TBO, and
+    piecewise-graph complexities.
+
+    Parameters
+    ----------
+    model_runner
+        The owning :class:`~pymllm.executor.model_runner.ModelRunner`.
+        Must have been fully initialised before the ``CudaGraphRunner``
+        is constructed.
+    """
+
+    def __init__(self, model_runner: "ModelRunner"):
+        self.model_runner = model_runner
+        self.device = model_runner.device
+
+        self.graphs: Dict[int, torch.cuda.CUDAGraph] = {}
+        self.output_buffers: Dict[int, LogitsProcessorOutput] = {}
+
+        self.enable_torch_compile: bool = (
+            model_runner.server_config.enable_torch_compile
+        )
+        self.torch_compile_max_bs: int = model_runner.server_config.torch_compile_max_bs
+
+        # -----------------------------------------------------------
+        # Batch-size schedule
+        # -----------------------------------------------------------
+        max_bs = model_runner.max_running_requests
+        self.capture_bs: List[int] = _default_capture_batch_sizes(max_bs)
+        self.compile_bs: List[int] = (
+            [bs for bs in self.capture_bs if bs <= self.torch_compile_max_bs]
+            if self.enable_torch_compile
+            else []
+        )
+        self.max_bs: int = max(self.capture_bs)
+
+        logger.info("CUDA graph capture batch sizes: %s", self.capture_bs)
+
+        # -----------------------------------------------------------
+        # Attention-backend CUDA-graph state
+        # -----------------------------------------------------------
+        self.model_runner.attn_backend.init_cuda_graph_state(self.max_bs, self.max_bs)
+
+        # Fill value for padded seq_lens so attention kernels don't div-by-0.
+        self.seq_len_fill_value: int = (
+            self.model_runner.attn_backend.get_cuda_graph_seq_len_fill_value()
+        )
+
+        # -----------------------------------------------------------
+        # Pre-allocated input buffers
+        # -----------------------------------------------------------
+        self.buffers: _InputBuffers = _InputBuffers.create(
+            device=torch.device(self.device),
+            max_bs=self.max_bs,
+            seq_len_fill_value=self.seq_len_fill_value,
+        )
+
+        # -----------------------------------------------------------
+        # Optional torch.compile config
+        # -----------------------------------------------------------
+        if self.enable_torch_compile:
+            _set_torch_compile_config()
+
+        # -----------------------------------------------------------
+        # Capture all batch sizes
+        # -----------------------------------------------------------
+        try:
+            with model_capture_mode():
+                self.capture()
+        except RuntimeError as exc:
+            raise RuntimeError(
+                f"CUDA graph capture failed: {exc}\n"
+                "Possible fixes:\n"
+                "  1. Reduce --server.mem_fraction_static (e.g. 0.7)\n"
+                "  2. Reduce --server.max_running_requests\n"
+                "  3. Disable CUDA graph with --server.disable_cuda_graph\n"
+            ) from exc
+
+    # ------------------------------------------------------------------
+    # Capability check
+    # ------------------------------------------------------------------
+
+    def can_run(self, forward_batch: ForwardBatch) -> bool:
+        """Return ``True`` if the batch can be run via CUDA graph replay.
+
+        The batch must be a decode (or idle) batch whose size does not
+        exceed the largest captured batch size.
+        """
+        return (
+            forward_batch.forward_mode.is_decode_or_idle()
+            and forward_batch.batch_size <= self.max_bs
+        )
+
+    # ------------------------------------------------------------------
+    # Capture
+    # ------------------------------------------------------------------
+
+    def capture(self) -> None:
+        """Capture CUDA graphs for every batch size in ``capture_bs``.
+
+        Iterates in reverse order (largest first) so that the GPU memory
+        pool allocated for the largest graph is reused by smaller ones.
+        """
+        tic = time.perf_counter()
+        before_mem = _get_avail_mem(self.device)
+        logger.info("CUDA graph capture begin. avail mem=%.2f GB", before_mem)
+
+        with freeze_gc():
+            stream = torch.cuda.Stream()
+            with torch.cuda.stream(stream):
+                for bs in reversed(self.capture_bs):
+                    forward_fn = self._get_forward_fn(bs)
+                    graph, output = self._capture_one_batch_size(bs, forward_fn, stream)
+                    self.graphs[bs] = graph
+                    self.output_buffers[bs] = output
+
+        after_mem = _get_avail_mem(self.device)
+        logger.info(
+            "CUDA graph capture end. elapsed=%.2f s, mem usage=%.2f GB, "
+            "avail mem=%.2f GB",
+            time.perf_counter() - tic,
+            before_mem - after_mem,
+            after_mem,
+        )
+
+    def _get_forward_fn(self, bs: int) -> Callable:
+        """Return the forward callable for the given batch size.
+
+        When ``torch.compile`` is enabled and *bs* is within the compile
+        threshold, the model's forward method is wrapped with
+        ``torch.compile``.
+        """
+        model_forward = self.model_runner.model.forward
+        if self.enable_torch_compile and bs in self.compile_bs:
+            return torch.compile(
+                torch.no_grad()(model_forward),
+                mode="max-autotune-no-cudagraphs",
+            )
+        return model_forward
+
+    def _capture_one_batch_size(
+        self,
+        bs: int,
+        forward: Callable,
+        stream: torch.cuda.Stream,
+    ) -> tuple:
+        """Capture a single CUDA graph for batch size *bs*.
+
+        Steps:
+        1. Build a ``ForwardBatch`` from the pre-allocated buffers.
+        2. Tell the attention backend to plan for CUDA-graph capture.
+        3. Run the forward pass twice for warmup.
+        4. Capture the third run into a ``CUDAGraph``.
+
+        Returns ``(graph, output_buffers)``.
+        """
+        buffers = self.buffers
+
+        # Slice pre-allocated buffers to the capture size.
+        input_ids = buffers.input_ids[:bs]
+        req_pool_indices = buffers.req_pool_indices[:bs]
+        seq_lens = buffers.seq_lens[:bs]
+        seq_lens_cpu = buffers.seq_lens_cpu[:bs]
+        out_cache_loc = buffers.out_cache_loc[:bs]
+        positions = buffers.positions[:bs]
+        mrope_position_deltas = buffers.mrope_position_deltas[:bs]
+
+        # Build ForwardBatch (DECODE mode).
+        # mrope_position_deltas is set to the static buffer (initially zeros)
+        # so that the graph captures the ``positions + deltas`` path.  During
+        # replay the buffer is updated with real delta values.
+        forward_batch = ForwardBatch(
+            forward_mode=ForwardMode.DECODE,
+            batch_size=bs,
+            input_ids=input_ids,
+            req_pool_indices=req_pool_indices,
+            seq_lens=seq_lens,
+            out_cache_loc=out_cache_loc,
+            seq_lens_sum=int(seq_lens.sum().item()),
+            seq_lens_cpu=seq_lens_cpu,
+            positions=positions,
+            return_logprob=False,
+            req_to_token_pool=self.model_runner.req_to_token_pool,
+            token_to_kv_pool=self.model_runner.token_to_kv_pool,
+            attn_backend=self.model_runner.attn_backend,
+            mrope_position_deltas=mrope_position_deltas,
+        )
+
+        # Tell the attention backend to set up CUDA-graph-aware metadata.
+        self.model_runner.attn_backend.init_forward_metadata_capture_cuda_graph(
+            bs=bs,
+            num_tokens=bs,
+            req_pool_indices=req_pool_indices,
+            seq_lens=seq_lens,
+            forward_mode=ForwardMode.DECODE,
+        )
+
+        # The single forward-pass function to be captured.
+        def run_once():
+            return forward(
+                input_ids,
+                forward_batch.positions,
+                forward_batch,
+            )
+
+        # Warmup (2 eager runs to stabilise cudnn / autotuner / etc.).
+        for _ in range(2):
+            torch.cuda.synchronize()
+            run_once()
+
+        # ----- Capture -----
+        global _global_graph_memory_pool
+        if _global_graph_memory_pool is None:
+            _global_graph_memory_pool = torch.cuda.graph_pool_handle()
+
+        graph = torch.cuda.CUDAGraph()
+        with torch.cuda.graph(
+            graph,
+            pool=_global_graph_memory_pool,
+            stream=stream,
+        ):
+            output = run_once()
+
+        return graph, output
+
+    # ------------------------------------------------------------------
+    # Replay
+    # ------------------------------------------------------------------
+
+    def replay(
+        self,
+        forward_batch: ForwardBatch,
+    ) -> LogitsProcessorOutput:
+        """Replay a captured CUDA graph for the given decode batch.
+
+        The batch is padded to the nearest captured size, inputs are copied
+        into the pre-allocated buffers, the graph is replayed, and the
+        output is sliced back to the real batch size.
+
+        Parameters
+        ----------
+        forward_batch
+            The decode batch from the scheduler.
+
+        Returns
+        -------
+        LogitsProcessorOutput
+            The logits for the real (un-padded) sequences.
+        """
+        real_bs = forward_batch.batch_size
+
+        # Find the smallest captured bs >= real_bs.
+        idx = bisect.bisect_left(self.capture_bs, real_bs)
+        padded_bs = self.capture_bs[idx]
+
+        # Copy real data into the static buffers.
+        self.buffers.populate(
+            forward_batch,
+            padded_bs=padded_bs,
+            seq_len_fill_value=self.seq_len_fill_value,
+        )
+
+        # Update the attention backend for replay.
+        seq_lens_sum = (
+            forward_batch.seq_lens_sum + (padded_bs - real_bs) * self.seq_len_fill_value
+        )
+        self.model_runner.attn_backend.init_forward_metadata_replay_cuda_graph(
+            bs=padded_bs,
+            req_pool_indices=self.buffers.req_pool_indices[:padded_bs],
+            seq_lens=self.buffers.seq_lens[:padded_bs],
+            seq_lens_sum=seq_lens_sum,
+            forward_mode=ForwardMode.DECODE,
+            seq_lens_cpu=self.buffers.seq_lens_cpu[:padded_bs],
+        )
+
+        # Replay the graph.
+        self.graphs[padded_bs].replay()
+
+        # Retrieve output and slice to real batch size.
+        output = self.output_buffers[padded_bs]
+
+        if isinstance(output, LogitsProcessorOutput):
+            return LogitsProcessorOutput(
+                next_token_logits=output.next_token_logits[:real_bs],
+                hidden_states=(
+                    output.hidden_states[:real_bs]
+                    if output.hidden_states is not None
+                    else None
+                ),
+            )
+        elif isinstance(output, torch.Tensor):
+            # Raw tensor output: assume [padded_bs, vocab_size].
+            return LogitsProcessorOutput(
+                next_token_logits=output[:real_bs],
+            )
+        else:
+            # HuggingFace-style output with .logits attribute.
+            if hasattr(output, "logits"):
+                logits = output.logits
+                if logits.dim() == 3:
+                    return LogitsProcessorOutput(
+                        next_token_logits=logits[:real_bs, -1, :],
+                    )
+                return LogitsProcessorOutput(
+                    next_token_logits=logits[:real_bs],
+                )
+            raise TypeError(f"Unexpected CUDA graph output type: {type(output)}")
+
+    # ------------------------------------------------------------------
+    # Cleanup
+    # ------------------------------------------------------------------
+
+    def shutdown(self) -> None:
+        """Release all captured CUDA graphs and associated buffers."""
+        for graph in self.graphs.values():
+            del graph
+        self.graphs.clear()
+        self.output_buffers.clear()
+        logger.info("CudaGraphRunner shutdown complete.")
+
+
+# ---------------------------------------------------------------------------
+# Utility helpers
+# ---------------------------------------------------------------------------
+
+
+def _get_avail_mem(device: str) -> float:
+    """Return available GPU memory in GB."""
+    if device != "cuda" or not torch.cuda.is_available():
+        return 0.0
+    free, _ = torch.cuda.mem_get_info()
+    return free / (1 << 30)
+
+
+def _set_torch_compile_config() -> None:
+    """Set dynamo / inductor configs for optimal CUDA-graph + compile."""
+    try:
+        import torch._dynamo.config
+        import torch._inductor.config
+
+        torch._inductor.config.coordinate_descent_tuning = True
+        torch._inductor.config.triton.unique_kernel_names = True
+        torch._inductor.config.fx_graph_cache = True
+        torch._dynamo.config.accumulated_cache_size_limit = 1024
+        if hasattr(torch._dynamo.config, "cache_size_limit"):
+            torch._dynamo.config.cache_size_limit = 1024
+    except ImportError:
+        logger.warning("torch._dynamo / torch._inductor not available.")
diff --git a/pymllm/executor/model_runner.py b/pymllm/executor/model_runner.py
index e69de29b..6d6f33fe 100644
--- a/pymllm/executor/model_runner.py
+++ b/pymllm/executor/model_runner.py
@@ -0,0 +1,1198 @@
+"""ModelRunner runs the forward passes of the models.
+
+Simplified from sglang's ``ModelRunner`` for pymllm's single-GPU inference
+architecture.  Handles:
+
+* Model loading (HuggingFace checkpoint via ``transformers``)
+* KV-cache memory pool initialisation
+* Attention backend setup (FlashInfer)
+* Forward pass dispatch (extend / decode / idle)
+* Token sampling from logits
+
+Typical lifecycle::
+
+    runner = ModelRunner(server_config, model_config)
+    runner.initialize()
+
+    # --- inside the inference loop ---
+    forward_batch = runner.prepare_forward_batch_decode(...)
+    logits_output = runner.forward(forward_batch)
+    next_token_ids = runner.sample(logits_output, forward_batch)
+
+Typical data flow
+-----------------
+    SchedulerProcess builds a batch dict
+        ↓
+    ModelRunnerProcess calls ModelRunner.forward(forward_batch)
+        ↓
+    attn_backend.init_forward_metadata(forward_batch)
+        ↓
+    model.forward(input_ids, positions, forward_batch)
+        ↓
+    ModelRunner.sample(logits_output, forward_batch)
+        ↓
+    next_token_ids returned to scheduler
+"""
+
+from __future__ import annotations
+
+import gc
+import logging
+import time
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union
+
+import torch
+from torch import nn
+
+from pymllm.configs import get_global_config
+from pymllm.engine.forward_batch import ForwardBatch, ForwardMode
+from pymllm.mem_cache.memory_pool import (
+    GDNPool,
+    KVPool,
+    ReqToTokenPool,
+    TokenToKVPoolAllocator,
+    make_full_attention_net_mem_pool,
+    make_req_to_token_pool,
+)
+
+if TYPE_CHECKING:
+    from pymllm.configs.model_config import ModelConfig
+    from pymllm.configs.server_config import ServerConfig
+    from pymllm.executor.cuda_graph_runner import CudaGraphRunner
+    from pymllm.layers.attention.attention_backend import AttentionBackend
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Utility: GPU memory query
+# ---------------------------------------------------------------------------
+
+
+def get_available_gpu_memory(device: str = "cuda", gpu_id: int = 0) -> float:
+    """Return available GPU memory in GB."""
+    if device != "cuda" or not torch.cuda.is_available():
+        return 0.0
+    torch.cuda.set_device(gpu_id)
+    free, _ = torch.cuda.mem_get_info(gpu_id)
+    return free / (1 << 30)
+
+
+def get_total_gpu_memory(device: str = "cuda", gpu_id: int = 0) -> float:
+    """Return total GPU memory in GB."""
+    if device != "cuda" or not torch.cuda.is_available():
+        return 0.0
+    torch.cuda.set_device(gpu_id)
+    _, total = torch.cuda.mem_get_info(gpu_id)
+    return total / (1 << 30)
+
+
+# ---------------------------------------------------------------------------
+# LogitsProcessorOutput
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class LogitsProcessorOutput:
+    """Container for output logits produced by the model's forward pass.
+
+    Attributes
+    ----------
+    next_token_logits
+        Raw logits for the last token of each sequence in the batch,
+        shape ``[batch_size, vocab_size]``.
+    hidden_states
+        Optional hidden states from the model (e.g. for speculative decoding
+        or auxiliary loss computation).
+    """
+
+    next_token_logits: torch.Tensor  # [batch_size, vocab_size]
+    hidden_states: Optional[torch.Tensor] = None
+
+
+# ---------------------------------------------------------------------------
+# ModelRunner
+# ---------------------------------------------------------------------------
+
+
+class ModelRunner:
+    """Runs the forward passes of the models.
+
+    This is the core execution component that owns the model, memory pools,
+    and attention backend.  It is used by
+    :class:`~pymllm.orchestrator.model_runner_process.ModelRunnerProcess` to
+    execute batches dispatched by the scheduler.
+
+    Parameters
+    ----------
+    server_config
+        Server runtime configuration.  Falls back to the global singleton
+        when ``None``.
+    model_config
+        Model configuration (wraps a HuggingFace ``PretrainedConfig``).
+        Falls back to the global singleton when ``None``.
+    gpu_id
+        GPU device index to use.
+    """
+
+    def __init__(
+        self,
+        server_config: Optional["ServerConfig"] = None,
+        model_config: Optional["ModelConfig"] = None,
+        gpu_id: int = 0,
+    ):
+        cfg = get_global_config()
+        self.server_config = server_config or cfg.server
+        self.model_config = model_config or cfg.model
+
+        self.gpu_id = gpu_id
+        self.device: str = "cuda" if torch.cuda.is_available() else "cpu"
+        self.dtype: torch.dtype = self._resolve_dtype()
+
+        # Set by initialize()
+        self.model: Optional[nn.Module] = None
+        self.req_to_token_pool: Optional[ReqToTokenPool] = None
+        self.token_to_kv_pool: Optional[KVPool] = None
+        self.token_to_kv_pool_allocator: Optional[TokenToKVPoolAllocator] = None
+        self.gdn_pool: Optional[GDNPool] = None
+        self.attn_backend: Optional["AttentionBackend"] = None
+        self.graph_runner: Optional["CudaGraphRunner"] = None
+
+        # Memory configuration
+        self.max_total_num_tokens: int = 0
+        self.max_running_requests: int = 0
+
+        # Model metadata (populated after loading)
+        self.num_hidden_layers: int = 0
+        self.num_attention_heads: int = 0
+        self.num_kv_heads: int = 0
+        self.head_dim: int = 0
+        self.hidden_size: int = 0
+        self.vocab_size: int = 0
+        self.context_len: int = 0
+
+        # KV cache dtype -- same as model dtype by default; may differ for
+        # quantised KV caches in the future.
+        self.kv_cache_dtype: torch.dtype = self.dtype
+
+        # Forward pass counter (monotonically increasing).
+        self.forward_pass_id: int = 0
+
+    # ------------------------------------------------------------------
+    # Initialisation
+    # ------------------------------------------------------------------
+
+    def initialize(self) -> None:
+        """Full initialisation: set device, load model, init memory + backend.
+
+        Call this once before any forward pass.
+        """
+        tic = time.perf_counter()
+        logger.info("ModelRunner initialisation begin.")
+
+        # Set device
+        if self.device == "cuda":
+            torch.cuda.set_device(self.gpu_id)
+
+        # Set default dtype
+        torch.set_default_dtype(self.dtype)
+
+        # Load the model
+        self.load_model()
+
+        # Extract model metadata from hf_config
+        self._extract_model_metadata()
+
+        # Resolve KV-cache dtype
+        self._configure_kv_cache_dtype()
+
+        # Initialise memory pools
+        self.init_memory_pool()
+
+        # Initialise attention backend
+        self.init_attention_backend()
+
+        # Warm up cuBLAS
+        if self.device == "cuda":
+            self._init_cublas()
+
+        # Capture CUDA graphs (must be after model + pools + backend)
+        self.init_cuda_graphs()
+
+        elapsed = time.perf_counter() - tic
+        logger.info(
+            "ModelRunner initialisation complete. elapsed=%.2f s, "
+            "device=%s, dtype=%s, kv_dtype=%s, max_tokens=%d, max_reqs=%d",
+            elapsed,
+            self.device,
+            self.dtype,
+            self.kv_cache_dtype,
+            self.max_total_num_tokens,
+            self.max_running_requests,
+        )
+
+    # ------------------------------------------------------------------
+    # Dtype resolution
+    # ------------------------------------------------------------------
+
+    def _resolve_dtype(self) -> torch.dtype:
+        """Resolve the model dtype from configuration."""
+        dtype_str = self.server_config.dtype
+        if dtype_str == "auto":
+            if torch.cuda.is_available():
+                if torch.cuda.get_device_capability()[0] >= 8:
+                    return torch.bfloat16
+                return torch.float16
+            return torch.float32
+        dtype_map = {
+            "float16": torch.float16,
+            "bfloat16": torch.bfloat16,
+            "float32": torch.float32,
+        }
+        result = dtype_map.get(dtype_str)
+        if result is None:
+            raise ValueError(f"Unsupported dtype: {dtype_str!r}")
+        return result
+
+    def _configure_kv_cache_dtype(self) -> None:
+        """Determine the dtype used for KV-cache storage.
+
+        The global ``QuantizationConfig.kv_cache_dtype`` can override the
+        model dtype (e.g. ``fp8_e4m3`` for quantised KV caches).  When set
+        to ``"auto"`` the model dtype is used as-is.
+        """
+        cfg = get_global_config()
+        kv_dtype_str = cfg.quantization.kv_cache_dtype
+
+        if kv_dtype_str == "auto":
+            self.kv_cache_dtype = self.dtype
+            return
+
+        kv_dtype_map = {
+            "float16": torch.float16,
+            "bfloat16": torch.bfloat16,
+            "fp8_e4m3": torch.float8_e4m3fn,
+            "fp8_e5m2": torch.float8_e5m2,
+        }
+        resolved = kv_dtype_map.get(kv_dtype_str)
+        if resolved is None:
+            logger.warning(
+                "Unrecognised kv_cache_dtype %r, falling back to model dtype.",
+                kv_dtype_str,
+            )
+            self.kv_cache_dtype = self.dtype
+        else:
+            self.kv_cache_dtype = resolved
+
+        logger.info("KV-cache dtype: %s", self.kv_cache_dtype)
+
+    # ------------------------------------------------------------------
+    # Model metadata
+    # ------------------------------------------------------------------
+
+    def _extract_model_metadata(self) -> None:
+        """Extract key model parameters from the HuggingFace config."""
+        hf_config = self.model_config.hf_config
+        if hf_config is None:
+            raise RuntimeError(
+                "HuggingFace config not loaded.  "
+                "Make sure model_config.hf_config is set before calling "
+                "initialize()."
+            )
+
+        # Handle text_config for multimodal models
+        text_config = getattr(hf_config, "text_config", hf_config)
+
+        self.num_hidden_layers = getattr(text_config, "num_hidden_layers", 0)
+        self.num_attention_heads = getattr(text_config, "num_attention_heads", 0)
+        self.num_kv_heads = getattr(
+            text_config,
+            "num_key_value_heads",
+            self.num_attention_heads,
+        )
+        self.head_dim = getattr(
+            text_config,
+            "head_dim",
+            getattr(text_config, "hidden_size", 0) // max(self.num_attention_heads, 1),
+        )
+        self.hidden_size = getattr(text_config, "hidden_size", 0)
+        self.vocab_size = getattr(text_config, "vocab_size", 0)
+
+        # V-head dim may differ from K-head dim (e.g. MLA)
+        self.v_head_dim: int = getattr(text_config, "v_head_dim", self.head_dim)
+
+        # Context length
+        self.context_len = self.server_config.context_length or getattr(
+            text_config, "max_position_embeddings", 4096
+        )
+
+        # Hybrid model metadata (GDN layers)
+        self.num_gdn_layers: int = getattr(self.model, "num_gdn_layers", 0)
+        self.full_attn_layer_ids: set = getattr(self.model, "full_attn_layer_ids", set())
+
+        logger.info(
+            "Model metadata: layers=%d, q_heads=%d, kv_heads=%d, "
+            "head_dim=%d, v_head_dim=%d, hidden=%d, vocab=%d, ctx_len=%d"
+            + (", gdn_layers=%d" if self.num_gdn_layers > 0 else ""),
+            self.num_hidden_layers,
+            self.num_attention_heads,
+            self.num_kv_heads,
+            self.head_dim,
+            self.v_head_dim,
+            self.hidden_size,
+            self.vocab_size,
+            self.context_len,
+            *([self.num_gdn_layers] if self.num_gdn_layers > 0 else []),
+        )
+
+    # ------------------------------------------------------------------
+    # Model loading
+    # ------------------------------------------------------------------
+
+    def load_model(self) -> None:
+        """Load the model from a HuggingFace checkpoint.
+
+        First checks the pymllm model registry for a custom implementation
+        that uses ``RadixAttention``.  If found, instantiates it with the
+        HuggingFace config and loads weights via ``load_weights()``.
+        Otherwise falls back to ``AutoModelForCausalLM.from_pretrained``.
+        """
+        tic = time.perf_counter()
+        model_path = self.server_config.model_path
+
+        if model_path is None:
+            raise RuntimeError("server_config.model_path is not set.")
+
+        before_mem = get_available_gpu_memory(self.device, self.gpu_id)
+        logger.info(
+            "Load model begin.  path=%s, avail mem=%.2f GB",
+            model_path,
+            before_mem,
+        )
+
+        # Look up the architecture in the pymllm model registry
+        from pymllm.models import _MODEL_REGISTRY, get_model_class
+
+        hf_config = self.model_config.hf_config
+        architectures = []
+        if hf_config is not None:
+            architectures = getattr(hf_config, "architectures", None) or []
+
+        if not architectures:
+            supported = ", ".join(sorted(_MODEL_REGISTRY.keys()))
+            raise RuntimeError(
+                f"Cannot determine model architecture from config. "
+                f"Supported architectures: {supported}"
+            )
+
+        architecture = architectures[0]
+        model_cls = get_model_class(architecture)
+        if model_cls is None:
+            supported = ", ".join(sorted(_MODEL_REGISTRY.keys()))
+            raise RuntimeError(
+                f"Architecture {architecture!r} is not supported by pymllm. "
+                f"Supported architectures: {supported}"
+            )
+
+        logger.info("Using pymllm model class: %s", model_cls.__name__)
+        device_str = f"cuda:{self.gpu_id}" if self.device == "cuda" else self.device
+        # Use set_default_dtype so parameters created without explicit dtype
+        # get the target dtype, while parameters with explicit dtype=torch.float32
+        # (e.g. A_log, dt_bias in GDN layers) stay in float32.
+        old_dtype = torch.get_default_dtype()
+        torch.set_default_dtype(self.dtype)
+        try:
+            with torch.device(device_str):
+                self.model = model_cls(hf_config)
+        finally:
+            torch.set_default_dtype(old_dtype)
+        self.model.load_weights(self._iter_weights(model_path))
+        self.model.eval()
+
+        after_mem = get_available_gpu_memory(self.device, self.gpu_id)
+        weight_mem = before_mem - after_mem
+        logger.info(
+            "Load model end.  elapsed=%.2f s, type=%s, "
+            "weight_mem=%.2f GB, avail mem=%.2f GB",
+            time.perf_counter() - tic,
+            type(self.model).__name__,
+            weight_mem,
+            after_mem,
+        )
+
+    @staticmethod
+    def _iter_weights(model_path) -> "Generator[Tuple[str, torch.Tensor], None, None]":
+        """Yield ``(name, tensor)`` pairs from safetensors or ``.bin`` files.
+
+        Prefers safetensors when available; falls back to PyTorch ``.bin``
+        files otherwise.
+        """
+        import glob as _glob
+        from pathlib import Path
+
+        model_path = Path(model_path)
+
+        # Prefer safetensors
+        st_files = sorted(_glob.glob(str(model_path / "*.safetensors")))
+        if st_files:
+            from safetensors.torch import load_file
+
+            for fpath in st_files:
+                state_dict = load_file(fpath)
+                yield from state_dict.items()
+                del state_dict
+            return
+
+        # Fallback: PyTorch .bin files
+        bin_files = sorted(_glob.glob(str(model_path / "*.bin")))
+        for fpath in bin_files:
+            state_dict = torch.load(fpath, map_location="cpu", weights_only=True)
+            yield from state_dict.items()
+            del state_dict
+
+    # ------------------------------------------------------------------
+    # Memory pool initialisation
+    # ------------------------------------------------------------------
+
+    def init_memory_pool(self) -> None:
+        """Initialise KV-cache memory pools and request-to-token mapping.
+
+        1. Profiles available GPU memory to determine the maximum number of
+           KV-cache token slots (``max_total_num_tokens``).
+        2. Derives ``max_running_requests`` from config or heuristic.
+        3. Creates :class:`~pymllm.mem_cache.memory_pool.ReqToTokenPool`,
+           :class:`~pymllm.mem_cache.memory_pool.KVPool`, and
+           :class:`~pymllm.mem_cache.memory_pool.TokenToKVPoolAllocator`.
+        """
+        logger.info("Initialising memory pools...")
+
+        # Determine max number of tokens in KV cache
+        self.max_total_num_tokens = self._profile_max_num_tokens()
+
+        # Determine max running requests
+        max_reqs = self.server_config.max_running_requests
+        if max_reqs is None:
+            max_reqs = min(
+                max(
+                    int(self.max_total_num_tokens / self.context_len * 512),
+                    2048,
+                ),
+                4096,
+            )
+        self.max_running_requests = max_reqs
+
+        if self.max_total_num_tokens <= 0:
+            raise RuntimeError(
+                "Not enough memory for KV cache.  "
+                "Try reducing context_length or using a smaller model."
+            )
+
+        # Create ReqToTokenPool
+        self.req_to_token_pool = make_req_to_token_pool(
+            max_reqs=self.max_running_requests,
+            max_context_len=self.context_len + 4,  # small padding
+            device=self.device,
+        )
+
+        # Create KVPool + TokenToKVPoolAllocator
+        # Note: layer_num uses num_hidden_layers even for hybrid models
+        # because the KV pool is indexed by global layer_id. GDN layers'
+        # KV slots are allocated but unused (they use GDNPool instead).
+        self.token_to_kv_pool, self.token_to_kv_pool_allocator = (
+            make_full_attention_net_mem_pool(
+                size=self.max_total_num_tokens,
+                layer_num=self.num_hidden_layers,
+                k_head_num=self.num_kv_heads,
+                k_head_dim=self.head_dim,
+                v_head_num=self.num_kv_heads,
+                v_head_dim=self.v_head_dim,
+                device=self.device,
+                dtype=self.kv_cache_dtype,
+            )
+        )
+
+        # Create GDNPool if hybrid model with GDN layers
+        if self.num_gdn_layers > 0:
+            hf_config = self.model_config.hf_config
+            text_config = getattr(hf_config, "text_config", hf_config)
+            gdn_num_k_heads = getattr(text_config, "linear_num_key_heads", 16)
+            gdn_num_v_heads = getattr(text_config, "linear_num_value_heads", 32)
+            gdn_head_k_dim = getattr(text_config, "linear_key_head_dim", 128)
+            gdn_head_v_dim = getattr(text_config, "linear_value_head_dim", 128)
+            gdn_conv_kernel = getattr(text_config, "linear_conv_kernel_dim", 4)
+            gdn_conv_dim = gdn_num_k_heads * gdn_head_k_dim * 2 + gdn_num_v_heads * gdn_head_v_dim
+
+            self.gdn_pool = GDNPool(
+                max_reqs=self.max_running_requests,
+                num_gdn_layers=self.num_gdn_layers,
+                num_v_heads=gdn_num_v_heads,
+                head_k_dim=gdn_head_k_dim,
+                head_v_dim=gdn_head_v_dim,
+                conv_dim=gdn_conv_dim,
+                conv_kernel_size=gdn_conv_kernel,
+                device=self.device,
+                dtype=self.dtype,
+                max_track_slots=self.max_running_requests,
+            )
+
+        logger.info(
+            "Memory pool initialised: max_tokens=%d, max_reqs=%d, kv_pool=%.2f GB"
+            + (", gdn_pool=%.2f GB" if self.gdn_pool is not None else ""),
+            self.max_total_num_tokens,
+            self.max_running_requests,
+            self.token_to_kv_pool._mem_bytes() / (1 << 30),
+            *([self.gdn_pool.mem_bytes() / (1 << 30)] if self.gdn_pool is not None else []),
+        )
+
+    def _profile_max_num_tokens(self) -> int:
+        """Profile available memory to determine maximum KV-cache tokens.
+
+        If ``server_config.max_total_tokens`` is explicitly set that value
+        is used directly.  Otherwise a memory-fraction-based heuristic
+        similar to sglang's ``profile_max_num_token`` is applied.
+        """
+        # If user explicitly set max_total_tokens, use that.
+        if self.server_config.max_total_tokens is not None:
+            return self.server_config.max_total_tokens
+
+        if self.device != "cuda":
+            # For CPU, use a conservative default.
+            return 4096
+
+        available_gb = get_available_gpu_memory(self.device, self.gpu_id)
+
+        # Determine memory fraction for static allocation (KV cache).
+        mem_fraction = self.server_config.mem_fraction_static
+        if mem_fraction is None:
+            mem_fraction = 0.85  # default: use 85% of remaining memory
+
+        # Calculate per-token KV cache size in bytes.
+        kv_element_size = torch.tensor([], dtype=self.kv_cache_dtype).element_size()
+        cell_size = (
+            self.num_kv_heads
+            * (self.head_dim + self.v_head_dim)  # K + V
+            * self.num_hidden_layers
+            * kv_element_size
+        )
+
+        if cell_size == 0:
+            logger.warning(
+                "cell_size is 0 (model metadata may be incomplete); "
+                "using default max_total_num_tokens=4096"
+            )
+            return 4096
+
+        rest_memory_bytes = int(available_gb * mem_fraction * (1 << 30))
+
+        # Reserve memory for GDN pool if hybrid model
+        if self.num_gdn_layers > 0:
+            hf_config = self.model_config.hf_config
+            text_config = getattr(hf_config, "text_config", hf_config)
+            gdn_num_k_heads = getattr(text_config, "linear_num_key_heads", 16)
+            gdn_num_v_heads = getattr(text_config, "linear_num_value_heads", 32)
+            gdn_head_k_dim = getattr(text_config, "linear_key_head_dim", 128)
+            gdn_head_v_dim = getattr(text_config, "linear_value_head_dim", 128)
+            gdn_conv_kernel = getattr(text_config, "linear_conv_kernel_dim", 4)
+            gdn_conv_dim = gdn_num_k_heads * gdn_head_k_dim * 2 + gdn_num_v_heads * gdn_head_v_dim
+
+            # Estimate GDN pool memory for max_running_requests
+            # Track slots add max_reqs_est extra slots for prefix cache snapshots
+            max_reqs_est = min(
+                max(int(rest_memory_bytes / cell_size / self.context_len * 512), 2048),
+                4096,
+            ) if self.server_config.max_running_requests is None else self.server_config.max_running_requests
+            pool_size = max_reqs_est + 1 + max_reqs_est  # +track_slots
+            recurrent_bytes = (
+                self.num_gdn_layers * pool_size * gdn_num_v_heads
+                * gdn_head_v_dim * gdn_head_k_dim * 4  # float32
+            )
+            dtype_size = torch.tensor([], dtype=self.dtype).element_size()
+            conv_bytes = (
+                self.num_gdn_layers * pool_size * gdn_conv_dim
+                * (gdn_conv_kernel - 1) * dtype_size
+            )
+            gdn_pool_bytes = recurrent_bytes + conv_bytes
+            rest_memory_bytes -= gdn_pool_bytes
+            logger.info(
+                "GDN pool memory reservation: %.2f GB",
+                gdn_pool_bytes / (1 << 30),
+            )
+
+        max_num_tokens = rest_memory_bytes // cell_size
+
+        logger.info(
+            "Memory profiling: avail=%.2f GB, fraction=%.2f, "
+            "cell_size=%d bytes, max_tokens=%d",
+            available_gb,
+            mem_fraction,
+            cell_size,
+            max_num_tokens,
+        )
+
+        return max(max_num_tokens, 1)  # at least 1
+
+    # ------------------------------------------------------------------
+    # Attention backend
+    # ------------------------------------------------------------------
+
+    def init_attention_backend(self) -> None:
+        """Initialise the attention backend.
+
+        Creates a :class:`FlashInferAttnBackend` for standard models, or a
+        :class:`HybridAttnBackend` (FlashInfer + GDN) for hybrid models.
+        """
+        from pymllm.layers.attention.flashinfer_backend import FlashInferAttnBackend
+
+        logger.info("Initialising attention backend...")
+
+        flash_backend = FlashInferAttnBackend(
+            num_heads=self.num_attention_heads,
+            num_kv_heads=self.num_kv_heads,
+            head_dim=self.head_dim,
+            kv_cache_dtype=self.kv_cache_dtype,
+            q_dtype=self.dtype,
+            max_context_len=self.context_len,
+            req_to_token=self.req_to_token_pool.req_to_token,
+            device=torch.device(self.device),
+            max_req_pool_size=self.req_to_token_pool.size,
+        )
+
+        if self.gdn_pool is not None:
+            from pymllm.layers.attention.gdn_backend import GDNAttnBackend
+            from pymllm.layers.attention.hybrid_backend import HybridAttnBackend
+
+            gdn_backend = GDNAttnBackend(
+                gdn_pool=self.gdn_pool,
+                device=torch.device(self.device),
+            )
+            self.attn_backend = HybridAttnBackend(
+                full_attn_backend=flash_backend,
+                gdn_backend=gdn_backend,
+                full_attn_layer_ids=self.full_attn_layer_ids,
+            )
+        else:
+            self.attn_backend = flash_backend
+
+        logger.info(
+            "Attention backend: %s",
+            type(self.attn_backend).__name__,
+        )
+
+    # ------------------------------------------------------------------
+    # Warmup
+    # ------------------------------------------------------------------
+
+    def _init_cublas(self) -> None:
+        """Run a small matmul to initialise cuBLAS.
+
+        Without this, the first real matmul may incur a significant
+        initialisation overhead.
+        """
+        dtype = torch.float16
+        device = "cuda"
+        a = torch.ones((16, 16), dtype=dtype, device=device)
+        b = torch.ones((16, 16), dtype=dtype, device=device)
+        _ = a @ b
+
+    # ------------------------------------------------------------------
+    # CUDA graph capture
+    # ------------------------------------------------------------------
+
+    def init_cuda_graphs(self) -> None:
+        """Capture CUDA graphs for decode-step acceleration.
+
+        Skipped when:
+        * The device is not CUDA.
+        * ``server_config.disable_cuda_graph`` is ``True``.
+        * The model is not a generation model.
+        """
+        self.graph_runner = None
+
+        if self.device != "cuda":
+            return
+        if self.server_config.disable_cuda_graph:
+            logger.info("CUDA graphs disabled by config.")
+            return
+        if not self.is_generation:
+            return
+
+        from pymllm.executor.cuda_graph_runner import CudaGraphRunner
+
+        tic = time.perf_counter()
+        before_mem = get_available_gpu_memory(self.device, self.gpu_id)
+        logger.info("Capturing CUDA graphs... avail mem=%.2f GB", before_mem)
+
+        self.graph_runner = CudaGraphRunner(self)
+
+        after_mem = get_available_gpu_memory(self.device, self.gpu_id)
+        logger.info(
+            "CUDA graph capture complete. elapsed=%.2f s, "
+            "mem usage=%.2f GB, avail mem=%.2f GB",
+            time.perf_counter() - tic,
+            before_mem - after_mem,
+            after_mem,
+        )
+
+    # ------------------------------------------------------------------
+    # ForwardBatch construction
+    # ------------------------------------------------------------------
+
+    def prepare_forward_batch_extend(
+        self,
+        input_ids: torch.Tensor,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+        extend_seq_lens: torch.Tensor,
+        extend_prefix_lens: torch.Tensor,
+        out_cache_loc: torch.Tensor,
+        return_logprob: bool = False,
+        top_logprobs_nums: Optional[List[int]] = None,
+    ) -> ForwardBatch:
+        """Build a :class:`ForwardBatch` for an extend (prefill) pass.
+
+        Parameters
+        ----------
+        input_ids
+            Token IDs for all new tokens, shape ``[total_new_tokens]``.
+        req_pool_indices
+            Index of each request in ``ReqToTokenPool``,
+            shape ``[batch_size]``.
+        seq_lens
+            Total (prefix + new) length of each sequence,
+            shape ``[batch_size]``.
+        extend_seq_lens
+            Number of new tokens per sequence, shape ``[batch_size]``.
+        extend_prefix_lens
+            Cached prefix length per sequence, shape ``[batch_size]``.
+        out_cache_loc
+            KV-pool slot indices for each new token,
+            shape ``[total_new_tokens]``.
+        return_logprob
+            Whether to return per-token log-probabilities.
+        top_logprobs_nums
+            Number of top log-probs per sequence.
+        """
+        batch_size = req_pool_indices.shape[0]
+        seq_lens_sum = int(seq_lens.sum().item())
+        extend_num_tokens = int(extend_seq_lens.sum().item())
+
+        # Compute positions for each token
+        positions = _compute_positions(extend_seq_lens, extend_prefix_lens)
+
+        # Compute extend_start_loc (exclusive cumsum of extend_seq_lens)
+        extend_start_loc = torch.zeros(
+            batch_size, dtype=torch.int32, device=self.device
+        )
+        if batch_size > 1:
+            extend_start_loc[1:] = torch.cumsum(extend_seq_lens[:-1], dim=0).to(
+                torch.int32
+            )
+
+        return ForwardBatch(
+            forward_mode=ForwardMode.EXTEND,
+            batch_size=batch_size,
+            input_ids=input_ids,
+            req_pool_indices=req_pool_indices,
+            seq_lens=seq_lens,
+            out_cache_loc=out_cache_loc,
+            seq_lens_sum=seq_lens_sum,
+            seq_lens_cpu=seq_lens.cpu(),
+            positions=positions,
+            extend_num_tokens=extend_num_tokens,
+            extend_seq_lens=extend_seq_lens,
+            extend_prefix_lens=extend_prefix_lens,
+            extend_start_loc=extend_start_loc,
+            extend_prefix_lens_cpu=extend_prefix_lens.tolist(),
+            extend_seq_lens_cpu=extend_seq_lens.tolist(),
+            return_logprob=return_logprob,
+            top_logprobs_nums=top_logprobs_nums,
+            req_to_token_pool=self.req_to_token_pool,
+            token_to_kv_pool=self.token_to_kv_pool,
+            attn_backend=self.attn_backend,
+        )
+
+    def prepare_forward_batch_decode(
+        self,
+        input_ids: torch.Tensor,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+        out_cache_loc: torch.Tensor,
+        return_logprob: bool = False,
+        top_logprobs_nums: Optional[List[int]] = None,
+        mrope_position_deltas: Optional[torch.Tensor] = None,
+    ) -> ForwardBatch:
+        """Build a :class:`ForwardBatch` for a decode step.
+
+        Parameters
+        ----------
+        input_ids
+            Token IDs (one per sequence), shape ``[batch_size]``.
+        req_pool_indices
+            Index of each request in ``ReqToTokenPool``,
+            shape ``[batch_size]``.
+        seq_lens
+            Total sequence length of each request, shape ``[batch_size]``.
+        out_cache_loc
+            KV-pool slot for each sequence's new token,
+            shape ``[batch_size]``.
+        return_logprob
+            Whether to return per-token log-probabilities.
+        top_logprobs_nums
+            Number of top log-probs per sequence.
+        mrope_position_deltas
+            Per-request M-RoPE position deltas, shape ``[batch_size]`` (int64).
+            Used by multimodal models (e.g. Qwen3-VL) to offset decode-step
+            positions by the spatial extent of prefill images.
+        """
+        batch_size = req_pool_indices.shape[0]
+        seq_lens_sum = int(seq_lens.sum().item())
+
+        # For decode, positions = seq_lens - 1 (the new token position)
+        positions = (seq_lens - 1).to(torch.int64)
+
+        return ForwardBatch(
+            forward_mode=ForwardMode.DECODE,
+            batch_size=batch_size,
+            input_ids=input_ids,
+            req_pool_indices=req_pool_indices,
+            seq_lens=seq_lens,
+            out_cache_loc=out_cache_loc,
+            seq_lens_sum=seq_lens_sum,
+            seq_lens_cpu=seq_lens.cpu(),
+            positions=positions,
+            return_logprob=return_logprob,
+            top_logprobs_nums=top_logprobs_nums,
+            req_to_token_pool=self.req_to_token_pool,
+            token_to_kv_pool=self.token_to_kv_pool,
+            attn_backend=self.attn_backend,
+            mrope_position_deltas=mrope_position_deltas,
+        )
+
+    # ------------------------------------------------------------------
+    # Forward pass
+    # ------------------------------------------------------------------
+
+    def forward(
+        self,
+        forward_batch: ForwardBatch,
+    ) -> LogitsProcessorOutput:
+        """Run a forward pass through the model.
+
+        Dispatches to the appropriate method based on the batch's
+        :attr:`~pymllm.engine.forward_batch.ForwardMode`.  For decode
+        batches, automatically uses CUDA-graph replay when a captured
+        graph is available.
+
+        Parameters
+        ----------
+        forward_batch
+            The prepared batch (from ``prepare_forward_batch_*``).
+
+        Returns
+        -------
+        LogitsProcessorOutput
+            Contains ``next_token_logits`` of shape
+            ``[batch_size, vocab_size]``.
+        """
+        self.forward_pass_id += 1
+
+        if forward_batch.forward_mode.is_idle():
+            return self._forward_idle(forward_batch)
+
+        # Try CUDA graph replay for decode batches.
+        if (
+            forward_batch.forward_mode.is_decode()
+            and self.graph_runner is not None
+            and self.graph_runner.can_run(forward_batch)
+        ):
+            return self.graph_runner.replay(forward_batch)
+
+        if forward_batch.forward_mode.is_decode():
+            return self.forward_decode(forward_batch)
+        elif forward_batch.forward_mode.is_extend():
+            return self.forward_extend(forward_batch)
+        else:
+            raise ValueError(f"Unsupported forward mode: {forward_batch.forward_mode}")
+
+    def forward_decode(
+        self,
+        forward_batch: ForwardBatch,
+    ) -> LogitsProcessorOutput:
+        """Run a decode forward pass (one new token per sequence).
+
+        Calls ``attn_backend.init_forward_metadata`` followed by
+        ``model.forward``.
+        """
+        self.attn_backend.init_forward_metadata(forward_batch)
+        model_output = self.model.forward(
+            forward_batch.input_ids,
+            forward_batch.positions,
+            forward_batch,
+        )
+        return self._process_logits(model_output, forward_batch)
+
+    def forward_extend(
+        self,
+        forward_batch: ForwardBatch,
+    ) -> LogitsProcessorOutput:
+        """Run an extend (prefill) forward pass.
+
+        Calls ``attn_backend.init_forward_metadata`` followed by
+        ``model.forward``.
+        """
+        self.attn_backend.init_forward_metadata(forward_batch)
+        model_output = self.model.forward(
+            forward_batch.input_ids,
+            forward_batch.positions,
+            forward_batch,
+        )
+        return self._process_logits(model_output, forward_batch)
+
+    def _forward_idle(
+        self,
+        forward_batch: ForwardBatch,
+    ) -> LogitsProcessorOutput:
+        """Return empty logits for an idle batch (no sequences to process)."""
+        return LogitsProcessorOutput(
+            next_token_logits=torch.empty(
+                (0, self.vocab_size),
+                dtype=self.dtype,
+                device=self.device,
+            ),
+        )
+
+    # ------------------------------------------------------------------
+    # Logits post-processing
+    # ------------------------------------------------------------------
+
+    def _process_logits(
+        self,
+        model_output: Any,
+        forward_batch: ForwardBatch,
+    ) -> LogitsProcessorOutput:
+        """Extract last-token logits from model output.
+
+        Handles:
+        * A :class:`LogitsProcessorOutput` returned by custom model
+          implementations.
+        * A ``CausalLMOutput`` (from HuggingFace ``transformers``) with a
+          ``.logits`` attribute.
+        * A raw ``torch.Tensor`` of logits.
+        """
+        if isinstance(model_output, LogitsProcessorOutput):
+            return model_output
+
+        # Standard HuggingFace output
+        if hasattr(model_output, "logits"):
+            logits = model_output.logits
+        elif isinstance(model_output, torch.Tensor):
+            logits = model_output
+        else:
+            raise TypeError(
+                f"Unexpected model output type: {type(model_output)}.  "
+                "Expected torch.Tensor or an object with .logits attribute."
+            )
+
+        # --- Decode: logits is [bs, 1, vocab] or [bs, vocab] ---
+        if forward_batch.forward_mode.is_decode():
+            if logits.dim() == 3:
+                next_token_logits = logits[:, -1, :]
+            else:
+                next_token_logits = logits
+        else:
+            # --- Extend: pick the last token of each sequence ---
+            next_token_logits = self._gather_last_token_logits(logits, forward_batch)
+
+        return LogitsProcessorOutput(next_token_logits=next_token_logits)
+
+    def _gather_last_token_logits(
+        self,
+        logits: torch.Tensor,
+        forward_batch: ForwardBatch,
+    ) -> torch.Tensor:
+        """Gather the logits of the last token in each sequence for extend.
+
+        During extend, the model processes all tokens but we only need the
+        logits at the last position of each sequence for next-token sampling.
+        """
+        if logits.dim() == 3:
+            # [batch_size, seq_len, vocab_size] from standard HF model
+            return logits[:, -1, :]
+
+        # Flat layout [total_tokens, vocab_size]
+        if (
+            forward_batch.extend_start_loc is not None
+            and forward_batch.extend_seq_lens is not None
+        ):
+            last_indices = (
+                forward_batch.extend_start_loc + forward_batch.extend_seq_lens - 1
+            ).long()
+            return logits[last_indices]
+
+        # Fallback: last row
+        return logits[-1:, :]
+
+    # ------------------------------------------------------------------
+    # Sampling
+    # ------------------------------------------------------------------
+
+    def sample(
+        self,
+        logits_output: LogitsProcessorOutput,
+        forward_batch: ForwardBatch,
+        temperatures: Optional[torch.Tensor] = None,
+        top_ps: Optional[torch.Tensor] = None,
+        top_ks: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        """Sample next-token IDs from logits.
+
+        Supports per-request temperature, top-p, and top-k.
+
+        Parameters
+        ----------
+        logits_output
+            The logits from :meth:`forward`.
+        forward_batch
+            The current forward batch.
+        temperatures
+            Per-request temperature, shape ``[batch_size]``.
+        top_ps
+            Per-request top-p, shape ``[batch_size]``.
+        top_ks
+            Per-request top-k, shape ``[batch_size]``.
+
+        Returns
+        -------
+        torch.Tensor
+            Next-token IDs, shape ``[batch_size]``, dtype ``int32``.
+        """
+        from pymllm.layers.sampling import (
+            sampling_from_probs,
+            softmax,
+            top_k_top_p_sampling_from_probs,
+        )
+
+        logits = logits_output.next_token_logits
+
+        if logits.numel() == 0:
+            return torch.empty(0, dtype=torch.int32, device=self.device)
+
+        # Greedy path: temperature=0 (or all zeros) → argmax, no sampling.
+        if temperatures is not None:
+            all_greedy = bool((temperatures < 1e-6).all())
+        else:
+            all_greedy = False
+
+        if all_greedy:
+            return logits.argmax(dim=-1).to(torch.int32)
+
+        # Stochastic path: apply temperature then sample.
+        if temperatures is not None:
+            probs = softmax(logits, temperature=temperatures)
+        else:
+            probs = torch.softmax(logits.float(), dim=-1)
+
+        # Apply top-k / top-p sampling if specified
+        has_top_k = top_ks is not None
+        has_top_p = top_ps is not None
+
+        if has_top_k or has_top_p:
+            k = top_ks if has_top_k else logits.shape[-1]
+            p = top_ps if has_top_p else 1.0
+            next_token_ids = top_k_top_p_sampling_from_probs(probs, k, p)
+        else:
+            next_token_ids = sampling_from_probs(probs)
+
+        return next_token_ids
+
+    # ------------------------------------------------------------------
+    # Cleanup
+    # ------------------------------------------------------------------
+
+    def shutdown(self) -> None:
+        """Release model and memory resources."""
+        logger.info("ModelRunner shutting down...")
+
+        if self.graph_runner is not None:
+            self.graph_runner.shutdown()
+            self.graph_runner = None
+        if self.model is not None:
+            del self.model
+            self.model = None
+        if self.token_to_kv_pool is not None:
+            del self.token_to_kv_pool
+            self.token_to_kv_pool = None
+        if self.token_to_kv_pool_allocator is not None:
+            del self.token_to_kv_pool_allocator
+            self.token_to_kv_pool_allocator = None
+        if self.gdn_pool is not None:
+            del self.gdn_pool
+            self.gdn_pool = None
+        if self.req_to_token_pool is not None:
+            del self.req_to_token_pool
+            self.req_to_token_pool = None
+        self.attn_backend = None
+
+        if self.device == "cuda":
+            torch.cuda.empty_cache()
+        gc.collect()
+
+        logger.info("ModelRunner shutdown complete.")
+
+    # ------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------
+
+    @property
+    def is_generation(self) -> bool:
+        """True if the model is a generation (causal-LM) model."""
+        return True
+
+    @property
+    def sliding_window_size(self) -> Optional[int]:
+        """Sliding-window attention span, or ``None`` for full context."""
+        hf_config = self.model_config.hf_config
+        if hf_config is None:
+            return None
+        text_config = getattr(hf_config, "text_config", hf_config)
+        return getattr(text_config, "sliding_window", None)
+
+
+# ---------------------------------------------------------------------------
+# Utility functions
+# ---------------------------------------------------------------------------
+
+
+def _compute_positions(
+    extend_seq_lens: torch.Tensor,
+    extend_prefix_lens: torch.Tensor,
+) -> torch.Tensor:
+    """Compute per-token positions for an extend batch.
+
+    For each sequence, positions are
+    ``[prefix_len, prefix_len+1, ..., prefix_len+seq_len-1]``.
+    The result is a flat 1-D tensor of shape ``[sum(extend_seq_lens)]``.
+    """
+    device = extend_seq_lens.device
+    batch_size = extend_seq_lens.shape[0]
+    total_tokens = int(extend_seq_lens.sum().item())
+
+    if total_tokens == 0:
+        return torch.empty(0, dtype=torch.int64, device=device)
+
+    positions = torch.empty(total_tokens, dtype=torch.int64, device=device)
+    offset = 0
+    for i in range(batch_size):
+        seq_len = int(extend_seq_lens[i].item())
+        prefix_len = int(extend_prefix_lens[i].item())
+        if seq_len > 0:
+            positions[offset : offset + seq_len] = torch.arange(
+                prefix_len,
+                prefix_len + seq_len,
+                dtype=torch.int64,
+                device=device,
+            )
+            offset += seq_len
+
+    return positions
diff --git a/pymllm/layers/__init__.py b/pymllm/layers/__init__.py
index 97cfb921..2ecb1396 100644
--- a/pymllm/layers/__init__.py
+++ b/pymllm/layers/__init__.py
@@ -6,9 +6,12 @@
 from pymllm.layers.linear import ColumnParallelLinear, Linear, RowParallelLinear
 from pymllm.layers.mlp import MLP, ParallelMLP
 from pymllm.layers.rms_norm import GemmaRMSNorm, RMSNorm
+from pymllm.layers.rms_norm_gated import RMSNormGated
+from pymllm.layers.gated_delta_net import GatedDeltaNet
 from pymllm.layers.rope import (
     apply_llama31_rope,
     apply_llama31_rope_pos_ids,
+    apply_mrope,
     apply_rope,
     apply_rope_pos_ids,
     apply_rope_with_cos_sin_cache,
@@ -41,6 +44,7 @@
     "LayerNorm",
     "RMSNorm",
     "GemmaRMSNorm",
+    "apply_mrope",
     "apply_rope",
     "apply_llama31_rope",
     "apply_rope_pos_ids",
diff --git a/pymllm/layers/attention/__init__.py b/pymllm/layers/attention/__init__.py
index 5d0dbf07..ae187975 100644
--- a/pymllm/layers/attention/__init__.py
+++ b/pymllm/layers/attention/__init__.py
@@ -8,7 +8,10 @@
     WrapperDispatch,
     should_use_tensor_core,
 )
+from pymllm.layers.attention.gdn_backend import GDNAttnBackend
+from pymllm.layers.attention.hybrid_backend import HybridAttnBackend
 from pymllm.layers.attention.radix_attention import AttentionType, RadixAttention
+from pymllm.layers.attention.radix_linear_attention import RadixLinearAttention
 
 __all__ = [
     # Base
@@ -16,10 +19,15 @@
     # RadixAttention
     "AttentionType",
     "RadixAttention",
+    # RadixLinearAttention (GDN)
+    "RadixLinearAttention",
     # FlashInfer backend
     "FlashInferAttnBackend",
     "DecodeMetadata",
     "PrefillMetadata",
     "WrapperDispatch",
     "should_use_tensor_core",
+    # GDN + Hybrid backends
+    "GDNAttnBackend",
+    "HybridAttnBackend",
 ]
diff --git a/pymllm/layers/attention/attention_backend.py b/pymllm/layers/attention/attention_backend.py
index 07e2f6a1..fe168c2d 100644
--- a/pymllm/layers/attention/attention_backend.py
+++ b/pymllm/layers/attention/attention_backend.py
@@ -103,6 +103,28 @@ def forward(
                 q, k, v, layer, forward_batch, save_kv_cache=save_kv_cache, **kwargs
             )
 
+    # ------------------------------------------------------------------
+    # GDN linear-attention interface (used by HybridAttnBackend)
+    # ------------------------------------------------------------------
+
+    def forward_gdn(
+        self,
+        layer: "RadixLinearAttention",
+        forward_batch: "ForwardBatch",
+        mixed_qkv: torch.Tensor,
+        a: torch.Tensor,
+        b: torch.Tensor,
+    ) -> torch.Tensor:
+        """Run GDN linear-attention for one layer.
+
+        Only implemented by backends that support hybrid (full + GDN)
+        architectures.  The default raises ``NotImplementedError``.
+        """
+        raise NotImplementedError(
+            f"{type(self).__name__} does not support GDN linear attention. "
+            "Use HybridAttnBackend for hybrid full+GDN models."
+        )
+
     # ------------------------------------------------------------------
     # Optional CUDA-graph interface
     # ------------------------------------------------------------------
diff --git a/pymllm/layers/attention/gdn_backend.py b/pymllm/layers/attention/gdn_backend.py
new file mode 100644
index 00000000..2b6e27b4
--- /dev/null
+++ b/pymllm/layers/attention/gdn_backend.py
@@ -0,0 +1,660 @@
+"""GDN attention backend -- pooled-state GDN computation for hybrid models.
+
+Performs GDN (Gated Delta Net) linear-attention using externalized state
+stored in a :class:`~pymllm.mem_cache.memory_pool.GDNPool`.  Supports
+both extend (prefill) and decode paths with FlashInfer kernels.
+
+This backend is not used directly; it is wrapped by
+:class:`~pymllm.layers.attention.hybrid_backend.HybridAttnBackend`.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Optional, Tuple
+
+import torch
+import torch.nn.functional as F
+
+if TYPE_CHECKING:
+    from pymllm.engine.forward_batch import ForwardBatch
+    from pymllm.layers.attention.radix_linear_attention import RadixLinearAttention
+    from pymllm.mem_cache.memory_pool import GDNPool
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Server config: gdn_decode_backend override
+# ---------------------------------------------------------------------------
+
+
+def _get_gdn_decode_backend_override() -> str:
+    """Read ``server.gdn_decode_backend`` from GlobalConfig.
+
+    Returns one of: ``"auto"``, ``"flashinfer"``, ``"mllm_kernel"``, ``"pytorch"``.
+    """
+    try:
+        from pymllm.configs import get_global_config
+        return get_global_config().server.gdn_decode_backend
+    except Exception:
+        return "auto"
+
+
+# ---------------------------------------------------------------------------
+# mllm-kernel GDN decode (lazy import, SM80+)
+# ---------------------------------------------------------------------------
+
+_mllm_gdn_decode = None
+
+
+def _get_mllm_gdn_decode():
+    """Lazy import for mllm-kernel fused GDN decode CUDA kernel."""
+    global _mllm_gdn_decode
+    if _mllm_gdn_decode is None:
+        try:
+            from mllm_kernel.cuda.jit.gdn_decode import gdn_decode
+
+            _mllm_gdn_decode = gdn_decode
+            logger.info("GDNAttnBackend: [probe] mllm-kernel GDN decode available (SM80+)")
+        except (ImportError, RuntimeError) as e:
+            logger.info("GDNAttnBackend: [probe] mllm-kernel GDN decode not available: %s", e)
+            _mllm_gdn_decode = False
+    return _mllm_gdn_decode if _mllm_gdn_decode is not False else None
+
+
+# ---------------------------------------------------------------------------
+# FlashInfer GDN kernel (lazy import)
+# ---------------------------------------------------------------------------
+
+_flashinfer_available: Optional[bool] = None
+_fi_chunk_gated_delta_rule = None
+_fi_gated_delta_rule_decode = None
+
+
+def _get_flashinfer_gdn():
+    """Lazy import for FlashInfer GDN kernels (prefill + decode)."""
+    global _flashinfer_available, _fi_chunk_gated_delta_rule, _fi_gated_delta_rule_decode
+    if _flashinfer_available is None:
+        try:
+            os.environ.setdefault("FLASHINFER_DISABLE_VERSION_CHECK", "1")
+            _flashinfer_available = (
+                torch.cuda.is_available()
+                and torch.cuda.get_device_capability()[0] >= 9
+            )
+            if not _flashinfer_available:
+                logger.info(
+                    "GDNAttnBackend: [probe] FlashInfer GDN not available (requires SM90+, "
+                    "current SM%d%d)", *torch.cuda.get_device_capability()
+                )
+                return _flashinfer_available, None, None
+
+            from flashinfer.gdn_prefill import chunk_gated_delta_rule
+            _fi_chunk_gated_delta_rule = chunk_gated_delta_rule
+
+            try:
+                from flashinfer.gdn_decode import gated_delta_rule_decode_pretranspose
+                _fi_gated_delta_rule_decode = gated_delta_rule_decode_pretranspose
+                logger.info("GDNAttnBackend: [probe] FlashInfer GDN available (prefill + decode)")
+            except ImportError:
+                logger.info(
+                    "GDNAttnBackend: [probe] FlashInfer GDN partially available "
+                    "(prefill only, decode not found)"
+                )
+        except (ImportError, RuntimeError) as e:
+            logger.info(
+                "GDNAttnBackend: [probe] FlashInfer GDN not available: %s", e
+            )
+            _flashinfer_available = False
+    return _flashinfer_available, _fi_chunk_gated_delta_rule, _fi_gated_delta_rule_decode
+
+
+# ---------------------------------------------------------------------------
+# GDN gating computation
+# ---------------------------------------------------------------------------
+
+
+def _gdn_gating(
+    a: torch.Tensor,
+    b: torch.Tensor,
+    A_log: torch.Tensor,
+    dt_bias: torch.Tensor,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Compute GDN gating factors.
+
+    Returns
+    -------
+    g     : log-space decay factor: -exp(A_log) * softplus(a + dt_bias)
+    beta  : update gate: sigmoid(b)
+    """
+    g = -torch.exp(A_log) * F.softplus(a + dt_bias)
+    beta = torch.sigmoid(b)
+    return g, beta
+
+
+# ---------------------------------------------------------------------------
+# Forward metadata
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class GDNForwardMetadata:
+    """Per-batch metadata for GDN backend."""
+
+    cache_indices: torch.Tensor  # [batch_size] = req_pool_indices
+    cu_seqlens: Optional[torch.Tensor] = None  # extend only
+
+
+# ---------------------------------------------------------------------------
+# GDNAttnBackend
+# ---------------------------------------------------------------------------
+
+
+class GDNAttnBackend:
+    """GDN linear-attention backend using pooled states.
+
+    Handles both extend (prefill) and decode paths for GDN layers.
+    Uses FlashInfer kernels when available (SM90+), with PyTorch fallback.
+
+    Parameters
+    ----------
+    gdn_pool
+        Pre-allocated :class:`~pymllm.mem_cache.memory_pool.GDNPool`.
+    device
+        Target device.
+    """
+
+    def __init__(self, gdn_pool: "GDNPool", device: torch.device):
+        self.gdn_pool = gdn_pool
+        self.device = device
+        self.forward_metadata: Optional[GDNForwardMetadata] = None
+
+        # Pre-check FlashInfer availability
+        self._use_flashinfer, _, _ = _get_flashinfer_gdn()
+
+        # One-shot flags to log the selected backend on first actual forward call
+        self._decode_backend_logged = False
+        self._extend_backend_logged = False
+
+    def init_forward_metadata(self, forward_batch: "ForwardBatch") -> None:
+        """Prepare GDN metadata from the current forward batch."""
+        cache_indices = forward_batch.req_pool_indices.to(torch.int64)
+
+        cu_seqlens = None
+        if forward_batch.forward_mode.is_extend():
+            # Build cu_seqlens from extend_seq_lens
+            if forward_batch.extend_seq_lens is not None:
+                seq_lens = forward_batch.extend_seq_lens.to(torch.int64)
+                cu_seqlens = torch.zeros(
+                    len(seq_lens) + 1,
+                    dtype=torch.int64,
+                    device=self.device,
+                )
+                torch.cumsum(seq_lens, dim=0, out=cu_seqlens[1:])
+
+        self.forward_metadata = GDNForwardMetadata(
+            cache_indices=cache_indices,
+            cu_seqlens=cu_seqlens,
+        )
+
+    # ------------------------------------------------------------------
+    # CUDA-graph interface
+    # ------------------------------------------------------------------
+
+    def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int) -> None:
+        """Allocate CUDA-graph state for GDN backend.
+
+        The GDN pool buffers are already pre-allocated at fixed addresses,
+        so we only need to allocate the metadata tensor.
+        """
+        self._cuda_graph_cache_indices = torch.zeros(
+            (max_bs,), dtype=torch.int64, device=self.device
+        )
+
+    def init_forward_metadata_capture_cuda_graph(
+        self,
+        bs: int,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+    ) -> None:
+        """Set up GDN metadata for CUDA-graph capture (decode only)."""
+        self._cuda_graph_cache_indices[:bs].copy_(
+            req_pool_indices[:bs].to(torch.int64)
+        )
+        self.forward_metadata = GDNForwardMetadata(
+            cache_indices=self._cuda_graph_cache_indices[:bs],
+        )
+
+    def init_forward_metadata_replay_cuda_graph(
+        self,
+        bs: int,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+    ) -> None:
+        """Update GDN metadata for CUDA-graph replay (decode only)."""
+        self._cuda_graph_cache_indices[:bs].copy_(
+            req_pool_indices[:bs].to(torch.int64)
+        )
+        self.forward_metadata = GDNForwardMetadata(
+            cache_indices=self._cuda_graph_cache_indices[:bs],
+        )
+
+    # ------------------------------------------------------------------
+    # Forward: decode
+    # ------------------------------------------------------------------
+
+    def forward_decode(
+        self,
+        layer: "RadixLinearAttention",
+        forward_batch: "ForwardBatch",
+        mixed_qkv: torch.Tensor,
+        a: torch.Tensor,
+        b: torch.Tensor,
+    ) -> torch.Tensor:
+        """GDN decode: one new token per request.
+
+        Steps:
+        1. Gather conv_state from pool → [bs, conv_dim, K-1]
+        2. Conv1d update: shift + weighted sum for 1 new token
+        3. Scatter updated conv_state back to pool
+        4. SiLU → split q,k,v
+        5. FlashInfer gated_delta_rule_decode (or PyTorch fallback)
+        """
+        metadata = self.forward_metadata
+        cache_indices = metadata.cache_indices
+        gdn_idx = layer.gdn_layer_idx
+        bs = mixed_qkv.shape[0]
+
+        recurrent_buf, conv_buf = self.gdn_pool.get_layer_state(gdn_idx)
+        conv_weight = layer.conv_weight  # [conv_dim, kernel_size]
+        K = conv_weight.shape[1]
+
+        # --- Conv1d decode: single-token update ---
+        conv_state = conv_buf[cache_indices]  # [bs, conv_dim, K-1]
+        x = mixed_qkv.unsqueeze(-1)  # [bs, conv_dim, 1]
+
+        new_conv_state = torch.cat([conv_state[:, :, 1:], x], dim=-1)
+        full_window = torch.cat([conv_state, x], dim=-1)  # [bs, conv_dim, K]
+        conv_out = (full_window * conv_weight.unsqueeze(0)).sum(dim=-1)
+
+        conv_buf[cache_indices] = new_conv_state
+
+        # --- SiLU activation ---
+        conv_out = F.silu(conv_out)
+
+        # --- Split q, k, v ---
+        key_dim = layer.num_k_heads * layer.head_k_dim
+        value_dim = layer.num_v_heads * layer.head_v_dim
+        q, k, v = conv_out.split([key_dim, key_dim, value_dim], dim=-1)
+        q = q.view(bs, layer.num_k_heads, layer.head_k_dim)
+        k = k.view(bs, layer.num_k_heads, layer.head_k_dim)
+        v = v.view(bs, layer.num_v_heads, layer.head_v_dim)
+
+        # --- Recurrent update ---
+        # Priority (when "auto"): FlashInfer SM90+ > mllm-kernel SM80+ > PyTorch
+        # Can be overridden via --server.gdn_decode_backend
+        backend = _get_gdn_decode_backend_override()
+        use_fi, _, fi_decode = _get_flashinfer_gdn()
+        mllm_gdn = _get_mllm_gdn_decode()
+
+        use_flashinfer = (
+            (backend in ("auto", "flashinfer"))
+            and use_fi and fi_decode is not None
+            and mixed_qkv.is_cuda
+        )
+        use_mllm = (
+            (backend in ("auto", "mllm_kernel"))
+            and not (backend == "auto" and use_flashinfer)
+            and mllm_gdn is not None
+            and mixed_qkv.is_cuda
+        )
+
+        if backend == "flashinfer" and not use_flashinfer:
+            logger.warning("GDNAttnBackend: gdn_decode_backend='flashinfer' requested but unavailable, falling back")
+        if backend == "mllm_kernel" and mllm_gdn is None:
+            logger.warning("GDNAttnBackend: gdn_decode_backend='mllm_kernel' requested but unavailable, falling back")
+
+        if not self._decode_backend_logged:
+            if use_flashinfer:
+                selected = "flashinfer"
+            elif use_mllm:
+                selected = "mllm_kernel"
+            else:
+                selected = "pytorch"
+            logger.info(
+                "GDNAttnBackend: [decode] using backend=%s (config=%s)", selected, backend
+            )
+            self._decode_backend_logged = True
+
+        if use_flashinfer:
+            # FlashInfer decode (SM90+)
+            query_fi = q.unsqueeze(1)
+            key_fi = k.unsqueeze(1)
+            value_fi = v.unsqueeze(1)
+            a_fi = a.unsqueeze(1)
+            b_fi = b.unsqueeze(1)
+
+            state_batch = recurrent_buf[cache_indices]
+
+            output_fi, new_state = fi_decode(
+                q=query_fi, k=key_fi, v=value_fi,
+                state=state_batch,
+                A_log=layer.A_log.detach(),
+                a=a_fi, dt_bias=layer.dt_bias.detach(), b=b_fi,
+                scale=None, output=None, use_qk_l2norm=True,
+            )
+
+            recurrent_buf[cache_indices] = new_state
+            output = output_fi.squeeze(1)
+
+        elif use_mllm:
+            # mllm-kernel fused CUDA decode (SM80+)
+            output = mllm_gdn(
+                q, k, v, a, b,
+                layer.A_log, layer.dt_bias,
+                recurrent_buf, cache_indices,
+            )
+
+        else:
+            # PyTorch fallback
+            g, beta = _gdn_gating(a, b, layer.A_log, layer.dt_bias)
+            output = self._decode_pytorch_fallback(
+                q, k, v, g, beta, recurrent_buf, cache_indices, layer
+            )
+
+        return output.reshape(bs, value_dim)
+
+    def _decode_pytorch_fallback(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        g: torch.Tensor,
+        beta: torch.Tensor,
+        recurrent_buf: torch.Tensor,
+        cache_indices: torch.Tensor,
+        layer: "RadixLinearAttention",
+    ) -> torch.Tensor:
+        """Pure PyTorch decode fallback for GDN with delta rule and L2 norm.
+
+        Matches the sglang Triton kernel (fused_sigmoid_gating_delta_rule_update):
+          state *= exp(g)                      # decay
+          v_delta = v - state @ k              # delta rule
+          v_delta *= beta                      # gating
+          state += v_delta outer k             # state update
+          output  = state @ q                  # readout
+        """
+        bs = q.shape[0]
+        num_v_heads = layer.num_v_heads
+        num_k_heads = layer.num_k_heads
+
+        # GQA: expand k/q heads to match v heads
+        if num_k_heads != num_v_heads:
+            repeats = num_v_heads // num_k_heads
+            q = q.repeat_interleave(repeats, dim=1)
+            k = k.repeat_interleave(repeats, dim=1)
+
+        # All computation in float32 (state is float32, avoids dtype mismatch)
+        orig_dtype = q.dtype
+        q = q.float()
+        k = k.float()
+        v = v.float()
+
+        # L2 normalize q and k per-head (matching use_qk_l2norm_in_kernel=True)
+        q = q / (q.norm(dim=-1, keepdim=True) + 1e-6)
+        k = k / (k.norm(dim=-1, keepdim=True) + 1e-6)
+
+        decay = torch.exp(g.float())    # [bs, num_v_heads]
+        beta_f = beta.float()           # [bs, num_v_heads]
+
+        outputs = []
+        for i in range(bs):
+            idx = cache_indices[i]
+            state = recurrent_buf[idx]  # [H, V, K] float32
+
+            # Decay
+            state = state * decay[i].unsqueeze(-1).unsqueeze(-1)
+
+            k_i = k[i]        # [H, K]
+            v_i = v[i]        # [H, V]
+            b_i = beta_f[i]   # [H]
+            q_i = q[i]        # [H, K]
+
+            # Delta rule: v_delta = v - state @ k
+            v_delta = v_i - torch.bmm(state, k_i.unsqueeze(-1)).squeeze(-1)
+            v_delta = v_delta * b_i.unsqueeze(-1)  # gating
+
+            # State update: state += v_delta ⊗ k  (outer product in [V, K] layout)
+            state = state + v_delta.unsqueeze(-1) * k_i.unsqueeze(-2)
+            recurrent_buf[idx] = state
+
+            # Output: o = state @ q
+            o_t = torch.bmm(state, q_i.unsqueeze(-1)).squeeze(-1)  # [H, V]
+            outputs.append(o_t)
+
+        return torch.stack(outputs, dim=0).to(orig_dtype)  # [bs, H, V]
+
+    # ------------------------------------------------------------------
+    # Forward: extend (prefill)
+    # ------------------------------------------------------------------
+
+    def forward_extend(
+        self,
+        layer: "RadixLinearAttention",
+        forward_batch: "ForwardBatch",
+        mixed_qkv: torch.Tensor,
+        a: torch.Tensor,
+        b: torch.Tensor,
+    ) -> torch.Tensor:
+        """GDN extend (prefill): multi-token per request.
+
+        Steps:
+        1. Gather conv_state from pool for each request
+        2. Per-request causal conv1d
+        3. Scatter new conv_state back to pool
+        4. SiLU → split q,k,v → gating
+        5. FlashInfer chunk_gated_delta_rule (or PyTorch fallback)
+        6. Scatter final recurrent state back to pool
+        """
+        metadata = self.forward_metadata
+        cache_indices = metadata.cache_indices
+        cu_seqlens = metadata.cu_seqlens
+        gdn_idx = layer.gdn_layer_idx
+        total_tokens = mixed_qkv.shape[0]
+
+        recurrent_buf, conv_buf = self.gdn_pool.get_layer_state(gdn_idx)
+        conv_weight = layer.conv_weight  # [conv_dim, kernel_size]
+        K = conv_weight.shape[1]
+        batch_size = cache_indices.shape[0]
+
+        key_dim = layer.num_k_heads * layer.head_k_dim
+        value_dim = layer.num_v_heads * layer.head_v_dim
+
+        # --- Per-request causal conv1d ---
+        conv_out = torch.empty_like(mixed_qkv)  # [total_tokens, conv_dim]
+
+        for i in range(batch_size):
+            start = int(cu_seqlens[i].item())
+            end = int(cu_seqlens[i + 1].item())
+            seq_len = end - start
+            if seq_len == 0:
+                continue
+
+            idx = cache_indices[i]
+            x = mixed_qkv[start:end]  # [seq_len, conv_dim]
+            prev_state = conv_buf[idx]  # [conv_dim, K-1]
+
+            # Pad with previous conv state
+            x_padded = torch.cat([prev_state.T, x], dim=0)  # [K-1+seq_len, conv_dim]
+
+            # Save new conv state (last K-1 tokens)
+            conv_buf[idx] = x_padded[-(K - 1):].T.clone()
+
+            # Causal conv1d
+            out = torch.zeros(seq_len, x.shape[1], device=x.device, dtype=x.dtype)
+            for kk in range(K):
+                out += x_padded[kk: kk + seq_len] * conv_weight[:, kk]
+            conv_out[start:end] = out
+
+        # --- SiLU activation ---
+        conv_out = F.silu(conv_out)
+
+        # --- Split q, k, v ---
+        q, k, v = conv_out.split([key_dim, key_dim, value_dim], dim=-1)
+        q = q.view(total_tokens, layer.num_k_heads, layer.head_k_dim)
+        k = k.view(total_tokens, layer.num_k_heads, layer.head_k_dim)
+        v = v.view(total_tokens, layer.num_v_heads, layer.head_v_dim)
+
+        # --- GDN gating ---
+        g, beta = _gdn_gating(a, b, layer.A_log, layer.dt_bias)
+
+        # --- Recurrent computation ---
+        use_fi, fi_prefill, _ = _get_flashinfer_gdn()
+        use_fi_extend = use_fi and fi_prefill is not None and mixed_qkv.is_cuda
+
+        if not self._extend_backend_logged:
+            logger.info(
+                "GDNAttnBackend: [extend] using backend=%s",
+                "flashinfer" if use_fi_extend else "pytorch",
+            )
+            self._extend_backend_logged = True
+
+        if use_fi_extend:
+            # Gather initial states for this batch
+            init_state = recurrent_buf[cache_indices].to(torch.float32)
+            # [batch_size, num_v_heads, head_v_dim, head_k_dim]
+
+            alpha = torch.exp(g.to(torch.float32))
+            beta_f32 = beta.to(torch.float32)
+
+            # FlashInfer's use_qk_l2norm_in_kernel is silently ignored —
+            # the flag is declared in the Python wrapper but never forwarded
+            # to the CUDA kernel.  Pre-normalize q and k here, matching
+            # sglang's approach (l2norm_fwd before calling with False).
+            q_fi = q / (q.norm(dim=-1, keepdim=True) + 1e-6)
+            k_fi = k / (k.norm(dim=-1, keepdim=True) + 1e-6)
+
+            output, final_state = fi_prefill(
+                q=q_fi.contiguous(),
+                k=k_fi.contiguous(),
+                v=v.contiguous(),
+                g=alpha,
+                beta=beta_f32,
+                initial_state=init_state,
+                output_final_state=True,
+                cu_seqlens=cu_seqlens,
+                use_qk_l2norm_in_kernel=False,
+            )
+
+            # Scatter final states back to pool
+            recurrent_buf[cache_indices] = final_state.to(recurrent_buf.dtype)
+        else:
+            # PyTorch fallback: per-request sequential scan
+            output = self._extend_pytorch_fallback(
+                q, k, v, g, beta, recurrent_buf, cache_indices, cu_seqlens, layer
+            )
+
+        return output.reshape(total_tokens, value_dim)
+
+    def _extend_pytorch_fallback(
+        self,
+        q: torch.Tensor,
+        k: torch.Tensor,
+        v: torch.Tensor,
+        g: torch.Tensor,
+        beta: torch.Tensor,
+        recurrent_buf: torch.Tensor,
+        cache_indices: torch.Tensor,
+        cu_seqlens: torch.Tensor,
+        layer: "RadixLinearAttention",
+    ) -> torch.Tensor:
+        """Pure PyTorch extend fallback for GDN with delta rule and L2 norm."""
+        total_tokens = q.shape[0]
+        num_v_heads = layer.num_v_heads
+        num_k_heads = layer.num_k_heads
+        head_v_dim = layer.head_v_dim
+        batch_size = cache_indices.shape[0]
+
+        # All computation in float32
+        orig_dtype = q.dtype
+        q = q.float()
+        k = k.float()
+        v = v.float()
+
+        # L2 normalize q and k per-head
+        q = q / (q.norm(dim=-1, keepdim=True) + 1e-6)
+        k = k / (k.norm(dim=-1, keepdim=True) + 1e-6)
+
+        # GQA expansion
+        if num_k_heads != num_v_heads:
+            repeats = num_v_heads // num_k_heads
+            q = q.repeat_interleave(repeats, dim=1)
+            k = k.repeat_interleave(repeats, dim=1)
+
+        output = torch.zeros(
+            total_tokens, num_v_heads, head_v_dim,
+            device=q.device, dtype=torch.float32,
+        )
+
+        for i in range(batch_size):
+            start = int(cu_seqlens[i].item())
+            end = int(cu_seqlens[i + 1].item())
+            seq_len = end - start
+            if seq_len == 0:
+                continue
+
+            idx = cache_indices[i]
+            q_seq = q[start:end]
+            k_seq = k[start:end]
+            v_seq = v[start:end]
+            g_seq = g[start:end]
+            beta_seq = beta[start:end]
+
+            decay = torch.exp(g_seq.float())   # [seq_len, H]
+            beta_f = beta_seq.float()           # [seq_len, H]
+            state = recurrent_buf[idx].clone()  # [H, V, K] float32
+
+            seq_outputs = []
+            for t in range(seq_len):
+                # Decay
+                state = state * decay[t].unsqueeze(-1).unsqueeze(-1)
+
+                k_t = k_seq[t]        # [H, K]
+                v_t = v_seq[t]        # [H, V]
+                b_t = beta_f[t]       # [H]
+                q_t = q_seq[t]        # [H, K]
+
+                # Delta rule: v_delta = v - state @ k
+                v_delta = v_t - torch.bmm(state, k_t.unsqueeze(-1)).squeeze(-1)
+                v_delta = v_delta * b_t.unsqueeze(-1)
+
+                # State update
+                state = state + v_delta.unsqueeze(-1) * k_t.unsqueeze(-2)
+
+                # Output
+                o_t = torch.bmm(state, q_t.unsqueeze(-1)).squeeze(-1)
+                seq_outputs.append(o_t)
+
+            recurrent_buf[idx] = state
+            output[start:end] = torch.stack(seq_outputs, dim=0)
+
+        return output.to(orig_dtype)
+
+    # ------------------------------------------------------------------
+    # Dispatch entry point
+    # ------------------------------------------------------------------
+
+    def forward_gdn(
+        self,
+        layer: "RadixLinearAttention",
+        forward_batch: "ForwardBatch",
+        mixed_qkv: torch.Tensor,
+        a: torch.Tensor,
+        b: torch.Tensor,
+    ) -> torch.Tensor:
+        """Route to decode or extend based on forward mode."""
+        if forward_batch.forward_mode.is_decode():
+            return self.forward_decode(layer, forward_batch, mixed_qkv, a, b)
+        else:
+            return self.forward_extend(layer, forward_batch, mixed_qkv, a, b)
diff --git a/pymllm/layers/attention/hybrid_backend.py b/pymllm/layers/attention/hybrid_backend.py
new file mode 100644
index 00000000..a5628259
--- /dev/null
+++ b/pymllm/layers/attention/hybrid_backend.py
@@ -0,0 +1,184 @@
+"""Hybrid attention backend -- FlashInfer + GDN for hybrid architectures.
+
+Wraps a :class:`FlashInferAttnBackend` (for full-attention layers) and a
+:class:`GDNAttnBackend` (for GDN linear-attention layers).  Dispatches
+based on layer type:
+
+* ``RadixAttention`` calls → delegated to ``full_attn_backend``
+* ``RadixLinearAttention`` calls (via ``forward_gdn``) → delegated to ``gdn_backend``
+
+CUDA-graph compatible: delegates all graph lifecycle methods to both
+sub-backends.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Optional, Set
+
+import torch
+
+from pymllm.layers.attention.attention_backend import AttentionBackend
+
+if TYPE_CHECKING:
+    from pymllm.engine.forward_batch import ForwardBatch, ForwardMode
+    from pymllm.layers.attention.flashinfer_backend import FlashInferAttnBackend
+    from pymllm.layers.attention.gdn_backend import GDNAttnBackend
+    from pymllm.layers.attention.radix_attention import RadixAttention
+    from pymllm.layers.attention.radix_linear_attention import RadixLinearAttention
+
+logger = logging.getLogger(__name__)
+
+
+class HybridAttnBackend(AttentionBackend):
+    """Composite attention backend for hybrid full-attention + GDN models.
+
+    Parameters
+    ----------
+    full_attn_backend
+        FlashInfer backend for standard transformer attention layers.
+    gdn_backend
+        GDN backend for linear-attention layers.
+    full_attn_layer_ids
+        Set of global layer IDs that use full attention (for logging).
+    """
+
+    def __init__(
+        self,
+        full_attn_backend: "FlashInferAttnBackend",
+        gdn_backend: "GDNAttnBackend",
+        full_attn_layer_ids: Set[int],
+    ):
+        self.full_attn_backend = full_attn_backend
+        self.gdn_backend = gdn_backend
+        self.full_attn_layer_ids = full_attn_layer_ids
+
+        logger.info(
+            "HybridAttnBackend created: %d full-attn layers, "
+            "%d GDN layers",
+            len(full_attn_layer_ids),
+            gdn_backend.gdn_pool.num_gdn_layers,
+        )
+
+    # ------------------------------------------------------------------
+    # Core interface: init_forward_metadata
+    # ------------------------------------------------------------------
+
+    def init_forward_metadata(self, forward_batch: "ForwardBatch") -> None:
+        """Initialize metadata for both sub-backends."""
+        self.full_attn_backend.init_forward_metadata(forward_batch)
+        self.gdn_backend.init_forward_metadata(forward_batch)
+
+    # ------------------------------------------------------------------
+    # Full attention: forward_decode / forward_extend
+    # ------------------------------------------------------------------
+
+    def forward_decode(
+        self,
+        q: torch.Tensor,
+        k: Optional[torch.Tensor],
+        v: Optional[torch.Tensor],
+        layer: "RadixAttention",
+        forward_batch: "ForwardBatch",
+        save_kv_cache: bool = True,
+        **kwargs,
+    ) -> torch.Tensor:
+        """Delegate full-attention decode to FlashInfer backend."""
+        return self.full_attn_backend.forward_decode(
+            q, k, v, layer, forward_batch, save_kv_cache=save_kv_cache, **kwargs
+        )
+
+    def forward_extend(
+        self,
+        q: torch.Tensor,
+        k: Optional[torch.Tensor],
+        v: Optional[torch.Tensor],
+        layer: "RadixAttention",
+        forward_batch: "ForwardBatch",
+        save_kv_cache: bool = True,
+        **kwargs,
+    ) -> torch.Tensor:
+        """Delegate full-attention extend to FlashInfer backend."""
+        return self.full_attn_backend.forward_extend(
+            q, k, v, layer, forward_batch, save_kv_cache=save_kv_cache, **kwargs
+        )
+
+    # ------------------------------------------------------------------
+    # GDN linear attention: forward_gdn
+    # ------------------------------------------------------------------
+
+    def forward_gdn(
+        self,
+        layer: "RadixLinearAttention",
+        forward_batch: "ForwardBatch",
+        mixed_qkv: torch.Tensor,
+        a: torch.Tensor,
+        b: torch.Tensor,
+    ) -> torch.Tensor:
+        """Delegate GDN computation to the GDN backend."""
+        return self.gdn_backend.forward_gdn(
+            layer=layer,
+            forward_batch=forward_batch,
+            mixed_qkv=mixed_qkv,
+            a=a,
+            b=b,
+        )
+
+    # ------------------------------------------------------------------
+    # CUDA-graph interface: delegate to both sub-backends
+    # ------------------------------------------------------------------
+
+    def get_cuda_graph_seq_len_fill_value(self) -> int:
+        """Delegate to the full-attention backend."""
+        return self.full_attn_backend.get_cuda_graph_seq_len_fill_value()
+
+    def init_cuda_graph_state(self, max_bs: int, max_num_tokens: int) -> None:
+        """Allocate CUDA-graph state for both sub-backends."""
+        self.full_attn_backend.init_cuda_graph_state(max_bs, max_num_tokens)
+        self.gdn_backend.init_cuda_graph_state(max_bs, max_num_tokens)
+
+    def init_forward_metadata_capture_cuda_graph(
+        self,
+        bs: int,
+        num_tokens: int,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+        forward_mode: "ForwardMode",
+    ) -> None:
+        """Set up metadata for CUDA-graph capture in both sub-backends."""
+        self.full_attn_backend.init_forward_metadata_capture_cuda_graph(
+            bs=bs,
+            num_tokens=num_tokens,
+            req_pool_indices=req_pool_indices,
+            seq_lens=seq_lens,
+            forward_mode=forward_mode,
+        )
+        self.gdn_backend.init_forward_metadata_capture_cuda_graph(
+            bs=bs,
+            req_pool_indices=req_pool_indices,
+            seq_lens=seq_lens,
+        )
+
+    def init_forward_metadata_replay_cuda_graph(
+        self,
+        bs: int,
+        req_pool_indices: torch.Tensor,
+        seq_lens: torch.Tensor,
+        seq_lens_sum: int,
+        forward_mode: "ForwardMode",
+        seq_lens_cpu: Optional[torch.Tensor],
+    ) -> None:
+        """Update metadata for CUDA-graph replay in both sub-backends."""
+        self.full_attn_backend.init_forward_metadata_replay_cuda_graph(
+            bs=bs,
+            req_pool_indices=req_pool_indices,
+            seq_lens=seq_lens,
+            seq_lens_sum=seq_lens_sum,
+            forward_mode=forward_mode,
+            seq_lens_cpu=seq_lens_cpu,
+        )
+        self.gdn_backend.init_forward_metadata_replay_cuda_graph(
+            bs=bs,
+            req_pool_indices=req_pool_indices,
+            seq_lens=seq_lens,
+        )
diff --git a/pymllm/layers/attention/radix_linear_attention.py b/pymllm/layers/attention/radix_linear_attention.py
new file mode 100644
index 00000000..01993163
--- /dev/null
+++ b/pymllm/layers/attention/radix_linear_attention.py
@@ -0,0 +1,116 @@
+"""RadixLinearAttention -- GDN linear-attention layer for hybrid models.
+
+Analogous to :class:`RadixAttention` but for GDN (Gated Delta Net) layers.
+Stores per-layer GDN parameters and delegates computation to the
+:meth:`AttentionBackend.forward_gdn` method on the current
+:class:`~pymllm.engine.forward_batch.ForwardBatch`.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import torch
+from torch import nn
+
+if TYPE_CHECKING:
+    from pymllm.engine.forward_batch import ForwardBatch
+
+
+class RadixLinearAttention(nn.Module):
+    """GDN linear-attention layer that delegates to the attention backend.
+
+    Each GDN layer in a pymllm model creates one ``RadixLinearAttention``
+    with a unique ``layer_id`` and ``gdn_layer_idx``.  During forward, it
+    calls ``forward_batch.attn_backend.forward_gdn(...)`` which routes to
+    the appropriate GDN backend implementation.
+
+    Parameters
+    ----------
+    layer_id : int
+        Global zero-based layer index within the model.
+    gdn_layer_idx : int
+        Sequential zero-based index among GDN layers only (not global).
+        Used to index into :class:`~pymllm.mem_cache.memory_pool.GDNPool`.
+    num_k_heads : int
+        Number of key heads.
+    num_v_heads : int
+        Number of value heads.
+    head_k_dim : int
+        Per-head key dimension.
+    head_v_dim : int
+        Per-head value dimension.
+    conv_weight : nn.Parameter
+        Reference to the GDNConv1d weight parameter.
+    A_log : nn.Parameter
+        Log-space decay parameter.
+    dt_bias : nn.Parameter
+        Bias for the decay gate.
+    """
+
+    def __init__(
+        self,
+        layer_id: int,
+        gdn_layer_idx: int,
+        num_k_heads: int,
+        num_v_heads: int,
+        head_k_dim: int,
+        head_v_dim: int,
+        conv_weight: nn.Parameter,
+        A_log: nn.Parameter,
+        dt_bias: nn.Parameter,
+    ):
+        super().__init__()
+        self.layer_id = layer_id
+        self.gdn_layer_idx = gdn_layer_idx
+        self.num_k_heads = num_k_heads
+        self.num_v_heads = num_v_heads
+        self.head_k_dim = head_k_dim
+        self.head_v_dim = head_v_dim
+        # Store references to model parameters (not copies)
+        self.conv_weight = conv_weight
+        self.A_log = A_log
+        self.dt_bias = dt_bias
+
+    def forward(
+        self,
+        forward_batch: "ForwardBatch",
+        mixed_qkv: torch.Tensor,
+        a: torch.Tensor,
+        b: torch.Tensor,
+    ) -> torch.Tensor:
+        """Delegate GDN computation to the attention backend.
+
+        Parameters
+        ----------
+        forward_batch
+            Batch metadata with ``attn_backend`` attached.
+        mixed_qkv
+            Concatenated Q/K/V projection output before conv1d.
+        a
+            Decay gate input, shape ``[num_tokens, num_v_heads]``.
+        b
+            Update gate input, shape ``[num_tokens, num_v_heads]``.
+
+        Returns
+        -------
+        torch.Tensor
+            GDN attention output, shape ``[num_tokens, num_v_heads * head_v_dim]``.
+        """
+        return forward_batch.attn_backend.forward_gdn(
+            layer=self,
+            forward_batch=forward_batch,
+            mixed_qkv=mixed_qkv,
+            a=a,
+            b=b,
+        )
+
+    def extra_repr(self) -> str:
+        return (
+            f"layer_id={self.layer_id}, "
+            f"gdn_layer_idx={self.gdn_layer_idx}, "
+            f"k_heads={self.num_k_heads}, "
+            f"v_heads={self.num_v_heads}, "
+            f"k_dim={self.head_k_dim}, "
+            f"v_dim={self.head_v_dim}"
+        )
diff --git a/pymllm/layers/gated_delta_net.py b/pymllm/layers/gated_delta_net.py
new file mode 100644
index 00000000..3753734d
--- /dev/null
+++ b/pymllm/layers/gated_delta_net.py
@@ -0,0 +1,168 @@
+"""Gated Delta Network (GDN) linear attention for Qwen3.5.
+
+This implements the linear attention mechanism used in Qwen3.5's hybrid
+architecture.  GDN alternates with standard full-attention layers.
+
+Core formulation (decode, per-head):
+    g_t = -exp(A_log) * softplus(a_t + dt_bias)
+    beta_t = sigmoid(b_t)
+    state_t = exp(g_t) * state_{t-1} + beta_t * (k_t outer v_t)
+    output_t = (q_t @ state_t)
+
+State is externalized into a :class:`~pymllm.mem_cache.memory_pool.GDNPool`
+and computation is delegated to the attention backend via
+:class:`~pymllm.layers.attention.radix_linear_attention.RadixLinearAttention`.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Optional
+
+import torch
+import torch.nn as nn
+
+from pymllm.layers.base import MllmBaseLayer
+from pymllm.layers.linear import Linear
+from pymllm.layers.utils import set_weight_attrs
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Conv1d weight holder
+# ---------------------------------------------------------------------------
+
+
+class GDNConv1d(nn.Module):
+    """Causal 1D convolution weight holder for GDN sequence mixing.
+
+    The actual convolution computation is performed by the GDN backend
+    using pooled conv states.  This module only holds the learnable weight.
+    """
+
+    def __init__(self, channels: int, kernel_size: int):
+        super().__init__()
+        self.channels = channels
+        self.kernel_size = kernel_size
+        self.weight = nn.Parameter(torch.empty(channels, kernel_size))
+
+
+# ---------------------------------------------------------------------------
+# GatedDeltaNet — main GDN layer
+# ---------------------------------------------------------------------------
+
+
+class GatedDeltaNet(MllmBaseLayer):
+    """Gated Delta Network linear attention layer for Qwen3.5.
+
+    State is externalized into a GDNPool and computation is delegated to
+    the attention backend via RadixLinearAttention.
+
+    Parameters
+    ----------
+    hidden_size : int
+        Model hidden dimension.
+    num_k_heads : int
+        Number of key heads.
+    num_v_heads : int
+        Number of value heads.
+    head_k_dim : int
+        Per-head key dimension.
+    head_v_dim : int
+        Per-head value dimension.
+    conv_kernel_size : int
+        Causal conv1d kernel width.
+    layer_id : int
+        Global layer index.
+    gdn_layer_idx : int
+        Sequential index among GDN layers (0-based).
+    rms_norm_eps : float
+        Epsilon for gated RMS normalization.
+    """
+
+    def __init__(
+        self,
+        hidden_size: int,
+        num_k_heads: int = 16,
+        num_v_heads: int = 32,
+        head_k_dim: int = 128,
+        head_v_dim: int = 128,
+        conv_kernel_size: int = 4,
+        layer_id: int = 0,
+        gdn_layer_idx: int = 0,
+        rms_norm_eps: float = 1e-6,
+    ):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.num_k_heads = num_k_heads
+        self.num_v_heads = num_v_heads
+        self.head_k_dim = head_k_dim
+        self.head_v_dim = head_v_dim
+        self.key_dim = head_k_dim * num_k_heads
+        self.value_dim = head_v_dim * num_v_heads
+        self.conv_kernel_size = conv_kernel_size
+        self.layer_id = layer_id
+        self.gdn_layer_idx = gdn_layer_idx
+
+        # Input projections
+        self.in_proj_qkv = Linear(hidden_size, self.key_dim * 2 + self.value_dim, bias=False)
+        self.in_proj_z = Linear(hidden_size, self.value_dim, bias=False)
+        self.in_proj_a = Linear(hidden_size, num_v_heads, bias=False)
+        self.in_proj_b = Linear(hidden_size, num_v_heads, bias=False)
+
+        # Causal convolution (weight only — computation is in the backend)
+        self.conv1d = GDNConv1d(self.key_dim * 2 + self.value_dim, conv_kernel_size)
+
+        # State parameters (must stay float32 for numerical stability)
+        self.A_log = nn.Parameter(torch.empty(num_v_heads, dtype=torch.float32))
+        self.dt_bias = nn.Parameter(torch.ones(num_v_heads, dtype=torch.float32))
+        set_weight_attrs(self.A_log, {"weight_loader": self.weight_loader})
+        set_weight_attrs(self.dt_bias, {"weight_loader": self.weight_loader})
+
+        # Gated RMSNorm (mllm-kernel accelerated)
+        from pymllm.layers.rms_norm_gated import RMSNormGated
+        self.norm = RMSNormGated(head_v_dim, eps=rms_norm_eps, norm_before_gate=True)
+
+        # Output projection
+        self.out_proj = Linear(self.value_dim, hidden_size, bias=False)
+
+        # RadixLinearAttention — delegates to the attention backend
+        from pymllm.layers.attention.radix_linear_attention import RadixLinearAttention
+        self.attn = RadixLinearAttention(
+            layer_id=layer_id,
+            gdn_layer_idx=gdn_layer_idx,
+            num_k_heads=num_k_heads,
+            num_v_heads=num_v_heads,
+            head_k_dim=head_k_dim,
+            head_v_dim=head_v_dim,
+            conv_weight=self.conv1d.weight,
+            A_log=self.A_log,
+            dt_bias=self.dt_bias,
+        )
+
+    def forward(
+        self, hidden_states: torch.Tensor, forward_batch: Any = None,
+    ) -> torch.Tensor:
+        seq_len, _ = hidden_states.shape
+
+        # Input projections
+        mixed_qkv = self.in_proj_qkv(hidden_states)
+        z = self.in_proj_z(hidden_states)
+        a = self.in_proj_a(hidden_states)
+        b = self.in_proj_b(hidden_states)
+
+        # Delegate to backend via RadixLinearAttention
+        # The backend handles: conv1d, SiLU, split, gating, recurrent update
+        attn_out = self.attn(forward_batch, mixed_qkv, a, b)
+
+        # Gated norm + output projection
+        attn_out = attn_out.view(seq_len, self.num_v_heads, self.head_v_dim)
+        z = z.view(seq_len, self.num_v_heads, self.head_v_dim)
+
+        attn_flat = attn_out.reshape(-1, self.head_v_dim)
+        z_flat = z.reshape(-1, self.head_v_dim)
+        normed = self.norm(attn_flat, z_flat)
+        normed = normed.view(seq_len, self.num_v_heads, self.head_v_dim)
+        normed = normed.reshape(seq_len, self.value_dim)
+        return self.out_proj(normed)
diff --git a/pymllm/layers/rms_norm.py b/pymllm/layers/rms_norm.py
index b55a0ea6..b20b36f3 100644
--- a/pymllm/layers/rms_norm.py
+++ b/pymllm/layers/rms_norm.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from typing import Optional, Tuple, Union
+
 import torch
 import flashinfer
 from torch.nn import Parameter
@@ -19,7 +21,15 @@ def __init__(self, hidden_size: int, eps: float = 1e-6):
         self.weight = Parameter(torch.empty(hidden_size))
         set_weight_attrs(self.weight, {"weight_loader": self.weight_loader})
 
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
+    def forward(
+        self,
+        x: torch.Tensor,
+        residual: Optional[torch.Tensor] = None,
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+        if residual is not None:
+            flashinfer.norm.fused_add_rmsnorm(x, residual, self.weight.data, self.eps)
+            return x, residual
+
         if x.shape[-1] != self.hidden_size:
             raise ValueError(
                 f"Expected last dim == hidden_size ({self.hidden_size}), "
@@ -47,7 +57,17 @@ def __init__(self, hidden_size: int, eps: float = 1e-6):
         self.weight = Parameter(torch.empty(hidden_size))
         set_weight_attrs(self.weight, {"weight_loader": self.weight_loader})
 
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
+    def forward(
+        self,
+        x: torch.Tensor,
+        residual: Optional[torch.Tensor] = None,
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+        if residual is not None:
+            flashinfer.norm.gemma_fused_add_rmsnorm(
+                x, residual, self.weight.data, self.eps
+            )
+            return x, residual
+
         if x.shape[-1] != self.hidden_size:
             raise ValueError(
                 f"Expected last dim == hidden_size ({self.hidden_size}), "
diff --git a/pymllm/layers/rms_norm_gated.py b/pymllm/layers/rms_norm_gated.py
new file mode 100644
index 00000000..caec9b88
--- /dev/null
+++ b/pymllm/layers/rms_norm_gated.py
@@ -0,0 +1,154 @@
+"""Gated RMSNorm layer for Qwen3.5 GDN attention.
+
+Computes ``rmsnorm(x, weight, eps) * silu(z)`` using a fused CUDA kernel
+from mllm-kernel.  Falls back to PyTorch when the kernel is unavailable.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Optional
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import Parameter
+
+from pymllm.layers.base import MllmBaseLayer
+from pymllm.layers.utils import set_weight_attrs
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Try to load the mllm-kernel fused CUDA implementation
+# ---------------------------------------------------------------------------
+_HAS_MLLM_KERNEL_CUDA = False
+try:
+    from mllm_kernel.cuda.jit.rms_norm_gated import (
+        rms_norm_gated as _mllm_rms_norm_gated,
+    )
+
+    _HAS_MLLM_KERNEL_CUDA = True
+except Exception:
+    _mllm_rms_norm_gated = None
+
+
+# ---------------------------------------------------------------------------
+# Pure-PyTorch fallback
+# ---------------------------------------------------------------------------
+
+
+def _rms_norm_gated_pytorch(
+    x: torch.Tensor,
+    weight: torch.Tensor,
+    z: Optional[torch.Tensor] = None,
+    eps: float = 1e-6,
+    norm_before_gate: bool = True,
+) -> torch.Tensor:
+    """Pure-PyTorch reference implementation."""
+    dtype = x.dtype
+    x_fp32 = x.float()
+    w_fp32 = weight.float()
+    z_fp32 = z.float() if z is not None else None
+
+    if z_fp32 is not None and not norm_before_gate:
+        x_fp32 = x_fp32 * F.silu(z_fp32)
+
+    variance = x_fp32.pow(2).mean(dim=-1, keepdim=True)
+    rstd = torch.rsqrt(variance + eps)
+    out = x_fp32 * rstd * w_fp32
+
+    if z_fp32 is not None and norm_before_gate:
+        out = out * F.silu(z_fp32)
+
+    return out.to(dtype)
+
+
+# ---------------------------------------------------------------------------
+# Unified dispatch
+# ---------------------------------------------------------------------------
+
+
+def rms_norm_gated(
+    x: torch.Tensor,
+    weight: torch.Tensor,
+    z: Optional[torch.Tensor] = None,
+    eps: float = 1e-6,
+    norm_before_gate: bool = True,
+) -> torch.Tensor:
+    """Compute (optionally gated) RMS normalization.
+
+    Uses the fused mllm-kernel CUDA implementation when available,
+    otherwise falls back to a pure-PyTorch implementation.
+    """
+    if _HAS_MLLM_KERNEL_CUDA and x.is_cuda:
+        return _mllm_rms_norm_gated(x, weight, z=z, eps=eps)
+    return _rms_norm_gated_pytorch(
+        x, weight, z=z, eps=eps, norm_before_gate=norm_before_gate,
+    )
+
+
+# ---------------------------------------------------------------------------
+# nn.Module wrapper
+# ---------------------------------------------------------------------------
+
+
+class RMSNormGated(MllmBaseLayer):
+    """Gated RMS Normalization layer for Qwen3.5 GDN attention.
+
+    Computes::
+
+        output = rmsnorm(x, weight) * silu(z)     # z is not None
+        output = rmsnorm(x, weight)                # z is None
+
+    Uses a fused CUDA kernel from mllm-kernel for maximum throughput.
+
+    Parameters
+    ----------
+    hidden_size : int
+        Dimensionality of the input (and weight vector).
+    eps : float
+        Small constant for numerical stability.
+    norm_before_gate : bool
+        If ``True``  (default): ``rmsnorm(x) * silu(z)``.
+        If ``False``:            ``rmsnorm(x * silu(z))``.
+    """
+
+    def __init__(
+        self,
+        hidden_size: int,
+        eps: float = 1e-6,
+        group_size: Optional[int] = None,
+        norm_before_gate: bool = True,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.eps = eps
+        self.norm_before_gate = norm_before_gate
+
+        factory_kwargs = {}
+        if device is not None:
+            factory_kwargs["device"] = device
+        if dtype is not None:
+            factory_kwargs["dtype"] = dtype
+
+        self.weight = Parameter(torch.ones(hidden_size, **factory_kwargs))
+        set_weight_attrs(self.weight, {"weight_loader": self.weight_loader})
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        z: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        return rms_norm_gated(
+            x, self.weight, z=z, eps=self.eps,
+            norm_before_gate=self.norm_before_gate,
+        )
+
+    def extra_repr(self) -> str:
+        return (
+            f"hidden_size={self.hidden_size}, eps={self.eps}, "
+            f"norm_before_gate={self.norm_before_gate}"
+        )
diff --git a/pymllm/layers/rope.py b/pymllm/layers/rope.py
index 045774e9..94f89b20 100644
--- a/pymllm/layers/rope.py
+++ b/pymllm/layers/rope.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Optional, Tuple
+from typing import List, Optional, Tuple
 
 import torch
 import flashinfer
@@ -44,7 +44,10 @@ def apply_rope(
     """
     if inplace:
         flashinfer.rope.apply_rope_inplace(
-            q, k, indptr, offsets,
+            q,
+            k,
+            indptr,
+            offsets,
             rotary_dim=rotary_dim,
             interleave=interleave,
             rope_scale=rope_scale,
@@ -53,7 +56,10 @@ def apply_rope(
         return None
 
     return flashinfer.rope.apply_rope(
-        q, k, indptr, offsets,
+        q,
+        k,
+        indptr,
+        offsets,
         rotary_dim=rotary_dim,
         interleave=interleave,
         rope_scale=rope_scale,
@@ -102,7 +108,10 @@ def apply_llama31_rope(
     """
     if inplace:
         flashinfer.rope.apply_llama31_rope_inplace(
-            q, k, indptr, offsets,
+            q,
+            k,
+            indptr,
+            offsets,
             rotary_dim=rotary_dim,
             interleave=interleave,
             rope_scale=rope_scale,
@@ -114,7 +123,10 @@ def apply_llama31_rope(
         return None
 
     return flashinfer.rope.apply_llama31_rope(
-        q, k, indptr, offsets,
+        q,
+        k,
+        indptr,
+        offsets,
         rotary_dim=rotary_dim,
         interleave=interleave,
         rope_scale=rope_scale,
@@ -156,7 +168,9 @@ def apply_rope_pos_ids(
     """
     if inplace:
         flashinfer.rope.apply_rope_pos_ids_inplace(
-            q, k, pos_ids,
+            q,
+            k,
+            pos_ids,
             rotary_dim=rotary_dim,
             interleave=interleave,
             rope_scale=rope_scale,
@@ -165,7 +179,9 @@ def apply_rope_pos_ids(
         return None
 
     return flashinfer.rope.apply_rope_pos_ids(
-        q, k, pos_ids,
+        q,
+        k,
+        pos_ids,
         rotary_dim=rotary_dim,
         interleave=interleave,
         rope_scale=rope_scale,
@@ -208,7 +224,9 @@ def apply_llama31_rope_pos_ids(
     """
     if inplace:
         flashinfer.rope.apply_llama31_rope_pos_ids_inplace(
-            q, k, pos_ids,
+            q,
+            k,
+            pos_ids,
             rotary_dim=rotary_dim,
             interleave=interleave,
             rope_scale=rope_scale,
@@ -220,7 +238,9 @@ def apply_llama31_rope_pos_ids(
         return None
 
     return flashinfer.rope.apply_llama31_rope_pos_ids(
-        q, k, pos_ids,
+        q,
+        k,
+        pos_ids,
         rotary_dim=rotary_dim,
         interleave=interleave,
         rope_scale=rope_scale,
@@ -265,12 +285,117 @@ def apply_rope_with_cos_sin_cache(
     """
     if inplace:
         flashinfer.rope.apply_rope_with_cos_sin_cache_inplace(
-            positions, query, key, head_size, cos_sin_cache,
+            positions,
+            query,
+            key,
+            head_size,
+            cos_sin_cache,
             is_neox=is_neox,
         )
         return None
 
     return flashinfer.rope.apply_rope_with_cos_sin_cache(
-        positions, query, key, head_size, cos_sin_cache,
+        positions,
+        query,
+        key,
+        head_size,
+        cos_sin_cache,
         is_neox=is_neox,
     )
+
+
+def _rotate_half(x: torch.Tensor) -> torch.Tensor:
+    """Rotate the second half of the last dimension into the first half (neox-style)."""
+    half = x.shape[-1] // 2
+    return torch.cat((-x[..., half:], x[..., :half]), dim=-1)
+
+
+def apply_mrope(
+    q: torch.Tensor,
+    k: torch.Tensor,
+    positions: torch.Tensor,
+    cos_sin_cache: torch.Tensor,
+    mrope_section: List[int],
+    mrope_interleaved: bool = True,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Apply multi-dimensional rotary position embedding (M-RoPE).
+
+    Used by Qwen3-VL which assigns independent (t, h, w) position indices to
+    each token.  For text tokens all three indices are the same sequential
+    value; for image tokens they follow the spatial grid layout.
+
+    Args:
+        q: Query tensor, shape ``(T, num_q_heads, head_dim)``.
+        k: Key tensor, shape ``(T, num_kv_heads, head_dim)``.
+        positions: 3-D position IDs, shape ``(3, T)`` — rows are
+            ``(temporal, height, width)`` position indices.
+        cos_sin_cache: Precomputed cache, shape ``(max_pos, head_dim)``.
+            The first ``head_dim // 2`` columns are cosine values and the
+            remaining columns are sine values, each for frequencies
+            ``0, 1, ..., head_dim // 2 - 1``.
+        mrope_section: Three integers ``[s_t, s_h, s_w]`` that partition
+            the ``head_dim // 2`` rotary frequency dimensions among the
+            temporal, height, and width components.
+            ``sum(mrope_section)`` must equal ``head_dim // 2``.
+        mrope_interleaved: When ``True`` (Qwen3-VL default), uses the
+            interleaved layout where frequency dimensions are cycled
+            ``(t, h, w, t, h, w, ...)`` rather than grouped consecutively.
+
+    Returns:
+        ``(q_rope, k_rope)`` with the same shapes as the inputs.
+    """
+    rotary_dim = cos_sin_cache.shape[-1]  # = head_dim
+    half_dim = rotary_dim // 2
+
+    # Look up cos/sin for each of the 3 position dimensions.
+    # positions: [3, T]  =>  cos_sin: [3, T, rotary_dim]
+    cos_sin = cos_sin_cache[positions]
+    cos = cos_sin[..., :half_dim]  # [3, T, half_dim]
+    sin = cos_sin[..., half_dim:]  # [3, T, half_dim]
+
+    if mrope_interleaved:
+        # Interleaved layout (Qwen3-VL): within the first
+        # mrope_section[1]*3 frequency dims, indices cycle (t, h, w).
+        # Remaining dims (indices >= span) all use the temporal position.
+        # Matches SGLang's apply_interleaved_rope.
+        cos_merged = cos[0].clone()  # start with temporal; shape [T, half_dim]
+        sin_merged = sin[0].clone()
+        span_h = mrope_section[1] * 3
+        span_w = mrope_section[2] * 3
+        cos_merged[..., 1:span_h:3] = cos[1, ..., 1:span_h:3]
+        cos_merged[..., 2:span_w:3] = cos[2, ..., 2:span_w:3]
+        sin_merged[..., 1:span_h:3] = sin[1, ..., 1:span_h:3]
+        sin_merged[..., 2:span_w:3] = sin[2, ..., 2:span_w:3]
+    else:
+        # Non-interleaved (Qwen2-VL style): consecutive frequency sections.
+        cos_sects = cos.split(mrope_section, dim=-1)  # list of [T, s_i]
+        sin_sects = sin.split(mrope_section, dim=-1)
+        # Section i picks its cos/sin from positions[i]
+        cos_merged = torch.cat(
+            [cos_sects[i][i] for i in range(3)], dim=-1
+        )  # [T, half_dim]
+        sin_merged = torch.cat(
+            [sin_sects[i][i] for i in range(3)], dim=-1
+        )  # [T, half_dim]
+
+    # Expand to full rotary_dim for the neox-style rotation formula:
+    # q_rot = q * cos_full + rotate_half(q) * sin_full
+    cos_full = cos_merged.repeat(1, 2)  # [T, rotary_dim]
+    sin_full = sin_merged.repeat(1, 2)  # [T, rotary_dim]
+    cos_4d = cos_full.unsqueeze(1)  # [T, 1, rotary_dim] -- broadcasts over heads
+    sin_4d = sin_full.unsqueeze(1)
+
+    q_rot = q[..., :rotary_dim] * cos_4d + _rotate_half(q[..., :rotary_dim]) * sin_4d
+    k_rot = k[..., :rotary_dim] * cos_4d + _rotate_half(k[..., :rotary_dim]) * sin_4d
+
+    q_out = (
+        torch.cat([q_rot, q[..., rotary_dim:]], dim=-1)
+        if rotary_dim < q.shape[-1]
+        else q_rot
+    )
+    k_out = (
+        torch.cat([k_rot, k[..., rotary_dim:]], dim=-1)
+        if rotary_dim < k.shape[-1]
+        else k_rot
+    )
+    return q_out, k_out
diff --git a/pymllm/layers/sampling.py b/pymllm/layers/sampling.py
index ff84879c..26c769ff 100644
--- a/pymllm/layers/sampling.py
+++ b/pymllm/layers/sampling.py
@@ -74,6 +74,15 @@ def softmax(
     torch.Tensor
         Probabilities with the same shape as *logits*.
     """
+    # Clamp temperature to avoid division by zero (temperature=0 → greedy).
+    # Replace 0 with 1 here; the caller (ModelRunner.sample) handles
+    # temperature=0 via argmax before reaching this path.
+    if temperature is not None:
+        if isinstance(temperature, torch.Tensor):
+            temperature = temperature.clamp(min=1e-6)
+        elif temperature < 1e-6:
+            temperature = 1.0  # effectively no scaling; caller uses argmax
+
     if _HAS_FLASHINFER:
         return _fi_sampling.softmax(
             logits, temperature=temperature, enable_pdl=enable_pdl
diff --git a/pymllm/mem_cache/memory_pool.py b/pymllm/mem_cache/memory_pool.py
index f9c176a9..9c8ab2a9 100644
--- a/pymllm/mem_cache/memory_pool.py
+++ b/pymllm/mem_cache/memory_pool.py
@@ -83,6 +83,10 @@ def __init__(
         self.device = torch.device(device)
         self.dtype = dtype
 
+        # pin_memory only applies to CPU tensors
+        if self.device.type != "cpu":
+            pin_memory = False
+
         buf_len = size + 1  # slot 0 is padding
 
         if buf_len % 8 != 0:
@@ -472,6 +476,161 @@ def make_full_attention_net_mem_pool(
     return pool, allocator
 
 
+class GDNPool:
+    """Pre-allocated memory pool for GDN recurrent and conv states.
+
+    Indexed by ``req_pool_idx`` (same index space as :class:`ReqToTokenPool`).
+    Slot 0 is reserved as a padding / dummy slot and is never allocated.
+
+    Layout::
+
+        recurrent_state[gdn_layer_idx, slot, num_v_heads, head_k_dim, head_v_dim]
+            float32 (FlashInfer requirement)
+        conv_state[gdn_layer_idx, slot, conv_dim, kernel_size - 1]
+            model dtype (bfloat16 / float16)
+
+    Parameters
+    ----------
+    max_reqs : int
+        Maximum number of concurrent requests (matches ``ReqToTokenPool.size``).
+    num_gdn_layers : int
+        Number of GDN (linear attention) layers in the model.
+    num_v_heads : int
+        Number of value heads per GDN layer.
+    head_k_dim : int
+        Per-head key dimension.
+    head_v_dim : int
+        Per-head value dimension.
+    conv_dim : int
+        Total convolution input dimension (``key_dim * 2 + value_dim``).
+    conv_kernel_size : int
+        Causal conv1d kernel width (state stores ``kernel_size - 1`` columns).
+    device : str | torch.device
+        Target device.
+    dtype : torch.dtype
+        Storage dtype for conv_state (recurrent_state is always float32).
+    """
+
+    def __init__(
+        self,
+        max_reqs: int,
+        num_gdn_layers: int,
+        num_v_heads: int,
+        head_k_dim: int,
+        head_v_dim: int,
+        conv_dim: int,
+        conv_kernel_size: int,
+        device: Union[str, torch.device] = "cuda",
+        dtype: torch.dtype = torch.bfloat16,
+        max_track_slots: int = 0,
+    ):
+        self.max_reqs = max_reqs
+        self.num_gdn_layers = num_gdn_layers
+        self.num_v_heads = num_v_heads
+        self.head_k_dim = head_k_dim
+        self.head_v_dim = head_v_dim
+        self.conv_dim = conv_dim
+        self.conv_kernel_size = conv_kernel_size
+        self.device = torch.device(device)
+        self.dtype = dtype
+        self.max_track_slots = max_track_slots
+
+        # Track slots live after the working slots: indices
+        # [max_reqs + 1, max_reqs + 1 + max_track_slots)
+        pool_size = max_reqs + 1 + max_track_slots  # slot 0 is padding
+
+        # Recurrent state: always float32 (FlashInfer requirement)
+        # Shape: [num_gdn_layers, pool_size, num_v_heads, head_v_dim, head_k_dim]
+        # Note: FlashInfer uses (V, K) layout for the state matrix
+        self.recurrent_state = torch.zeros(
+            (num_gdn_layers, pool_size, num_v_heads, head_v_dim, head_k_dim),
+            dtype=torch.float32,
+            device=self.device,
+        )
+
+        # Conv state: model dtype
+        # Shape: [num_gdn_layers, pool_size, conv_dim, kernel_size - 1]
+        self.conv_state = torch.zeros(
+            (num_gdn_layers, pool_size, conv_dim, conv_kernel_size - 1),
+            dtype=dtype,
+            device=self.device,
+        )
+
+        # Track-slot free list (indices into the pool starting after working slots)
+        self._track_slot_base = max_reqs + 1
+        self._free_track_slots: List[int] = list(
+            range(self._track_slot_base, self._track_slot_base + max_track_slots)
+        )
+
+        logger.info(
+            "GDNPool allocated: %d GDN layers, %d working + %d track slots, "
+            "v_heads=%d, k_dim=%d, v_dim=%d, conv_dim=%d, kernel=%d, %.2f GB",
+            num_gdn_layers,
+            max_reqs,
+            max_track_slots,
+            num_v_heads,
+            head_k_dim,
+            head_v_dim,
+            conv_dim,
+            conv_kernel_size,
+            self.mem_bytes() / (1 << 30),
+        )
+
+    def get_layer_state(
+        self, gdn_layer_idx: int
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Return ``(recurrent_state, conv_state)`` for a specific GDN layer.
+
+        Both are views into the pool tensors with shape:
+        - recurrent: ``[pool_size, num_v_heads, head_v_dim, head_k_dim]``
+        - conv: ``[pool_size, conv_dim, kernel_size - 1]``
+        """
+        return (
+            self.recurrent_state[gdn_layer_idx],
+            self.conv_state[gdn_layer_idx],
+        )
+
+    def reset_states(self, req_pool_indices: torch.Tensor) -> None:
+        """Zero-init GDN states for the given request pool indices.
+
+        Called when new requests are allocated to ensure clean state.
+        """
+        if req_pool_indices.numel() == 0:
+            return
+        # Zero both recurrent and conv states for all GDN layers
+        self.recurrent_state[:, req_pool_indices] = 0
+        self.conv_state[:, req_pool_indices] = 0
+
+    # ------------------------------------------------------------------
+    # Track-slot management (for prefix cache GDN state snapshots)
+    # ------------------------------------------------------------------
+
+    def alloc_track_slot(self) -> Optional[int]:
+        """Allocate a single track slot index.  Returns ``None`` if exhausted."""
+        if not self._free_track_slots:
+            return None
+        return self._free_track_slots.pop()
+
+    def free_track_slot(self, slot: int) -> None:
+        """Return a track slot to the free list."""
+        self._free_track_slots.append(slot)
+
+    def copy_states(self, src_index: int, dst_index: int) -> None:
+        """Copy recurrent and conv states from *src_index* to *dst_index*.
+
+        Works for any pool indices (working or track slots).
+        """
+        self.recurrent_state[:, dst_index] = self.recurrent_state[:, src_index]
+        self.conv_state[:, dst_index] = self.conv_state[:, src_index]
+
+    def mem_bytes(self) -> int:
+        """Total memory consumption in bytes."""
+        return (
+            self.recurrent_state.nelement() * self.recurrent_state.element_size()
+            + self.conv_state.nelement() * self.conv_state.element_size()
+        )
+
+
 def make_req_to_token_pool(
     max_reqs: int,
     max_context_len: int,
diff --git a/pymllm/mem_cache/radix_cache.py b/pymllm/mem_cache/radix_cache.py
index 997790ea..441a8c09 100644
--- a/pymllm/mem_cache/radix_cache.py
+++ b/pymllm/mem_cache/radix_cache.py
@@ -18,7 +18,7 @@
 import time
 from collections import defaultdict
 from dataclasses import dataclass
-from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
 
 import torch
 
@@ -189,6 +189,7 @@ class InsertResult:
     """Returned by :meth:`RadixCache.insert`."""
 
     prefix_len: int = 0
+    last_node: Optional[TreeNode] = None
 
 
 @dataclass
@@ -224,11 +225,13 @@ def __init__(
         sliding_window_size: Optional[int] = None,
         disable: bool = False,
         token_to_kv_pool_allocator: Any = None,
+        on_node_evict: Optional[Callable[[int], None]] = None,
     ):
         self.page_size = page_size
         self.sliding_window_size = sliding_window_size
         self.disable = disable
         self.pool = token_to_kv_pool_allocator
+        self.on_node_evict = on_node_evict
 
         if self.pool is not None and hasattr(self.pool, "device"):
             self.device = self.pool.device
@@ -332,9 +335,10 @@ def insert(
             plen = self._insert_swa(
                 self.root_node, key, value, prev_prefix_len, swa_evicted_seqlen
             )
+            return InsertResult(prefix_len=plen)
         else:
-            plen = self._insert_normal(self.root_node, key, value)
-        return InsertResult(prefix_len=plen)
+            plen, last_node = self._insert_normal(self.root_node, key, value)
+            return InsertResult(prefix_len=plen, last_node=last_node)
 
     def evict(self, num_tokens: int, swa_num_tokens: int = 0) -> EvictResult:
         """Evict up to *num_tokens* (full) and *swa_num_tokens* (SWA) tokens.
@@ -589,30 +593,38 @@ def _match_swa(self, key: RadixKey) -> Tuple[List[torch.Tensor], TreeNode, int]:
 
         return values, best_node, best_count
 
-    def _insert_normal(self, node: TreeNode, key: RadixKey, value: torch.Tensor) -> int:
+    def _insert_normal(
+        self, node: TreeNode, key: RadixKey, value: torch.Tensor
+    ) -> Tuple[int, TreeNode]:
+        """Insert into non-SWA tree.  Returns ``(prefix_len, last_node)``."""
         now = time.monotonic()
         node.last_access_time = now
         if len(key) == 0:
-            return 0
+            return 0, node
 
         total_prefix = 0
-        while len(key) > 0:
-            ck = _child_key(key, self.page_size)
-            if ck not in node.children:
-                break
+        ck = _child_key(key, self.page_size)
+        while len(key) > 0 and ck in node.children:
             node = node.children[ck]
             node.last_access_time = now
             plen = _key_match(node.key, key, self.page_size)
-            if plen < len(node.key):
-                self._split_node(node.key, node, plen)
             total_prefix += plen
             key = key[plen:]
             value = value[plen:]
 
+            if plen < len(node.key):
+                # Partial match: split the node.  ``node`` must advance to
+                # the NEW parent so that any remaining key is added as a
+                # sibling of the tail, not a child of it.
+                node = self._split_node(node.key, node, plen)
+            if len(key) > 0:
+                ck = _child_key(key, self.page_size)
+
         if len(key) > 0:
-            self._add_leaf(node, key, value)
+            new_leaf = self._add_leaf(node, key, value)
+            node = new_leaf
 
-        return total_prefix
+        return total_prefix, node
 
     def _insert_swa(
         self,
@@ -730,6 +742,8 @@ def _delete_leaf(self, node: TreeNode) -> None:
         self._evictable_size -= len(node.key)
         if self.supports_swa and not node.swa_tombstone:
             self._swa_evictable_size -= len(node.key)
+        if self.on_node_evict is not None:
+            self.on_node_evict(node.id)
 
     def _tombstone_node(self, node: TreeNode) -> None:
         node.swa_tombstone = True
diff --git a/pymllm/models/__init__.py b/pymllm/models/__init__.py
index e69de29b..7751b309 100644
--- a/pymllm/models/__init__.py
+++ b/pymllm/models/__init__.py
@@ -0,0 +1,62 @@
+"""Model registry for pymllm.
+
+Maps HuggingFace ``config.architectures[0]`` strings to pymllm model classes.
+Models are imported lazily via ``importlib`` so that heavy dependencies (torch,
+numpy, etc.) are only loaded when a model is actually requested.
+"""
+
+from __future__ import annotations
+
+import importlib
+import logging
+from typing import Dict, Optional, Tuple, Type
+
+import torch.nn as nn
+
+logger = logging.getLogger(__name__)
+
+# (module_path, class_name)
+_MODEL_REGISTRY: Dict[str, Tuple[str, str]] = {
+    "Qwen3VLForConditionalGeneration": (
+        "pymllm.models.qwen3_vl",
+        "Qwen3VLForConditionalGeneration",
+    ),
+    # Qwen3.5 (hybrid attention: full + GDN linear)
+    "Qwen3_5ForCausalLM": (
+        "pymllm.models.qwen3_5",
+        "Qwen3_5ForCausalLM",
+    ),
+    "Qwen3_5ForConditionalGeneration": (
+        "pymllm.models.qwen3_5",
+        "Qwen3_5ForConditionalGeneration",
+    ),
+}
+
+
+def get_model_class(architecture: str) -> Optional[Type[nn.Module]]:
+    """Look up a pymllm model class by HuggingFace architecture string.
+
+    Returns ``None`` if the architecture is not registered or cannot be
+    imported.  The caller is responsible for raising an appropriate error.
+    """
+    entry = _MODEL_REGISTRY.get(architecture)
+    if entry is None:
+        return None
+
+    module_path, class_name = entry
+    try:
+        module = importlib.import_module(module_path)
+        cls = getattr(module, class_name)
+        logger.info(
+            "Resolved architecture %r -> %s.%s", architecture, module_path, class_name
+        )
+        return cls
+    except (ImportError, AttributeError) as exc:
+        logger.warning(
+            "Failed to import %s.%s for architecture %r: %s",
+            module_path,
+            class_name,
+            architecture,
+            exc,
+        )
+        return None
diff --git a/pymllm/models/qwen3_5.py b/pymllm/models/qwen3_5.py
new file mode 100644
index 00000000..ca4dbe2e
--- /dev/null
+++ b/pymllm/models/qwen3_5.py
@@ -0,0 +1,530 @@
+"""Inference-only Qwen3.5 model for pymllm.
+
+Implements the hybrid attention architecture:
+- **Full attention layers** (standard transformer with RoPE + output gate)
+- **GDN linear attention layers** (Gated Delta Network, O(n) complexity)
+
+Layers alternate: linear, attention, linear, attention, ... based on
+``full_attention_interval`` in the config.
+
+Supports:
+- Dense (non-MoE) variant
+- Vision-Language (multimodal) via inheritance from Qwen3VL
+
+Adapted from sglang's ``qwen3_5.py``.
+"""
+
+from __future__ import annotations
+
+import logging
+import math
+from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from pymllm.layers.attention.radix_attention import RadixAttention
+from pymllm.layers.embedding import VocabParallelEmbedding
+from pymllm.layers.gated_delta_net import GatedDeltaNet
+from pymllm.layers.linear import Linear
+from pymllm.layers.mlp import MLP
+from pymllm.layers.rms_norm import GemmaRMSNorm, RMSNorm
+from pymllm.layers.rope import apply_rope_pos_ids
+from pymllm.layers.utils import set_weight_attrs
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Config helpers
+# ---------------------------------------------------------------------------
+
+
+def _get_text_config(config):
+    """Extract the text sub-config from a multimodal config, or return as-is."""
+    return getattr(config, "text_config", config)
+
+
+def _get_layer_types(config) -> List[str]:
+    """Return per-layer type list: 'attention' or 'linear_attention'."""
+    if hasattr(config, "layers_block_type"):
+        return config.layers_block_type
+    # Compute from full_attention_interval
+    interval = getattr(config, "full_attention_interval", 2)
+    n_layers = config.num_hidden_layers
+    types = []
+    for i in range(n_layers):
+        if (i + 1) % interval == 0:
+            types.append("attention")
+        else:
+            types.append("linear_attention")
+    return types
+
+
+# ---------------------------------------------------------------------------
+# Full Attention Layer (with output gate + QK norm)
+# ---------------------------------------------------------------------------
+
+
+class Qwen3_5FullAttention(nn.Module):
+    """Standard multi-head attention with RoPE, QK-norm, and optional output gate."""
+
+    def __init__(self, config, layer_id: int):
+        super().__init__()
+        tc = _get_text_config(config)
+        self.hidden_size = tc.hidden_size
+        self.num_heads = tc.num_attention_heads
+        self.num_kv_heads = tc.num_key_value_heads
+        self.head_dim = getattr(tc, "head_dim", self.hidden_size // self.num_heads)
+        self.q_size = self.num_heads * self.head_dim
+        self.kv_size = self.num_kv_heads * self.head_dim
+        self.scaling = self.head_dim ** -0.5
+        self.layer_id = layer_id
+
+        # Output gate: Qwen3.5 doubles the Q projection and uses half as a
+        # sigmoid gate on the attention output.
+        self.attn_output_gate = getattr(tc, "attn_output_gate", True)
+
+        if self.attn_output_gate:
+            q_proj_size = self.q_size * 2  # Q + gate
+        else:
+            q_proj_size = self.q_size
+
+        self.q_proj = Linear(self.hidden_size, q_proj_size, bias=False)
+        self.k_proj = Linear(self.hidden_size, self.kv_size, bias=False)
+        self.v_proj = Linear(self.hidden_size, self.kv_size, bias=False)
+        self.o_proj = Linear(self.q_size, self.hidden_size, bias=False)
+
+        # QK normalization
+        self.q_norm = GemmaRMSNorm(self.head_dim, eps=tc.rms_norm_eps)
+        self.k_norm = GemmaRMSNorm(self.head_dim, eps=tc.rms_norm_eps)
+
+        # RoPE config
+        self.partial_rotary_factor = getattr(tc, "partial_rotary_factor", 1.0)
+        rope_config = getattr(tc, "rope_parameters", None) or getattr(tc, "rope_scaling", None) or {}
+        self.rope_theta = rope_config.get("rope_theta", getattr(tc, "rope_theta", 10000.0))
+        self.rotary_dim = int(self.head_dim * self.partial_rotary_factor)
+
+        # RadixAttention layer — delegates to the pluggable attention backend
+        self.attn = RadixAttention(
+            num_heads=self.num_heads,
+            head_dim=self.head_dim,
+            scaling=self.scaling,
+            num_kv_heads=self.num_kv_heads,
+            layer_id=layer_id,
+        )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        forward_batch: Any,
+    ) -> torch.Tensor:
+        seq_len = hidden_states.shape[0]
+
+        q = self.q_proj(hidden_states)
+        k = self.k_proj(hidden_states)
+        v = self.v_proj(hidden_states)
+
+        if self.attn_output_gate:
+            # Split Q into actual Q and gate
+            q_gate = q.view(seq_len, self.num_heads, self.head_dim * 2)
+            q, gate = q_gate.chunk(2, dim=-1)
+            q = q.reshape(seq_len, -1)
+            gate = gate.reshape(seq_len, -1)
+
+        # QK norm
+        q = self.q_norm(q.reshape(-1, self.head_dim)).view(seq_len, -1)
+        k = self.k_norm(k.reshape(-1, self.head_dim)).view(seq_len, -1)
+
+        # RoPE (inplace; rotary_dim handles partial rotation)
+        q = q.view(seq_len, self.num_heads, self.head_dim)
+        k = k.view(seq_len, self.num_kv_heads, self.head_dim)
+        apply_rope_pos_ids(
+            q, k, positions, inplace=True,
+            rotary_dim=self.rotary_dim, rope_theta=self.rope_theta,
+        )
+        q = q.reshape(seq_len, -1)
+        k = k.reshape(seq_len, -1)
+
+        # Standard attention via RadixAttention → attn_backend
+        attn_output = self.attn(q, k, v, forward_batch)
+
+        # Output gate
+        if self.attn_output_gate:
+            attn_output = attn_output * torch.sigmoid(gate)
+
+        return self.o_proj(attn_output)
+
+
+# ---------------------------------------------------------------------------
+# Full Attention Decoder Layer
+# ---------------------------------------------------------------------------
+
+
+class Qwen3_5AttentionDecoderLayer(nn.Module):
+    """Decoder layer with full attention + MLP."""
+
+    def __init__(self, config, layer_id: int):
+        super().__init__()
+        tc = _get_text_config(config)
+        self.self_attn = Qwen3_5FullAttention(config, layer_id)
+        self.mlp = MLP(
+            hidden_size=tc.hidden_size,
+            intermediate_size=tc.intermediate_size,
+            activation=tc.hidden_act,
+        )
+        self.input_layernorm = GemmaRMSNorm(tc.hidden_size, eps=tc.rms_norm_eps)
+        self.post_attention_layernorm = GemmaRMSNorm(tc.hidden_size, eps=tc.rms_norm_eps)
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        residual: Optional[torch.Tensor],
+        forward_batch: Any,
+    ):
+        # Pre-norm + residual
+        if residual is None:
+            residual = hidden_states
+            hidden_states = self.input_layernorm(hidden_states)
+        else:
+            hidden_states, residual = self.input_layernorm(hidden_states, residual)
+
+        hidden_states = self.self_attn(positions, hidden_states, forward_batch)
+
+        # Post-attention norm + residual
+        hidden_states, residual = self.post_attention_layernorm(hidden_states, residual)
+        hidden_states = self.mlp(hidden_states)
+
+        return hidden_states, residual
+
+
+# ---------------------------------------------------------------------------
+# Linear Attention (GDN) Decoder Layer
+# ---------------------------------------------------------------------------
+
+
+class Qwen3_5LinearDecoderLayer(nn.Module):
+    """Decoder layer with GDN linear attention + MLP."""
+
+    def __init__(self, config, layer_id: int, gdn_layer_idx: int = 0):
+        super().__init__()
+        tc = _get_text_config(config)
+        self.linear_attn = GatedDeltaNet(
+            hidden_size=tc.hidden_size,
+            num_k_heads=getattr(tc, "linear_num_key_heads", 16),
+            num_v_heads=getattr(tc, "linear_num_value_heads", 32),
+            head_k_dim=getattr(tc, "linear_key_head_dim", 128),
+            head_v_dim=getattr(tc, "linear_value_head_dim", 128),
+            conv_kernel_size=getattr(tc, "linear_conv_kernel_dim", 4),
+            layer_id=layer_id,
+            gdn_layer_idx=gdn_layer_idx,
+            rms_norm_eps=tc.rms_norm_eps,
+        )
+        self.mlp = MLP(
+            hidden_size=tc.hidden_size,
+            intermediate_size=tc.intermediate_size,
+            activation=tc.hidden_act,
+        )
+        self.input_layernorm = GemmaRMSNorm(tc.hidden_size, eps=tc.rms_norm_eps)
+        self.post_attention_layernorm = GemmaRMSNorm(tc.hidden_size, eps=tc.rms_norm_eps)
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        residual: Optional[torch.Tensor],
+        forward_batch: Any,
+    ):
+        if residual is None:
+            residual = hidden_states
+            hidden_states = self.input_layernorm(hidden_states)
+        else:
+            hidden_states, residual = self.input_layernorm(hidden_states, residual)
+
+        hidden_states = self.linear_attn(hidden_states, forward_batch)
+
+        hidden_states, residual = self.post_attention_layernorm(hidden_states, residual)
+        hidden_states = self.mlp(hidden_states)
+
+        return hidden_states, residual
+
+
+# ---------------------------------------------------------------------------
+# Layer type registry
+# ---------------------------------------------------------------------------
+
+_DECODER_LAYER_TYPES = {
+    "attention": Qwen3_5AttentionDecoderLayer,
+    "linear_attention": Qwen3_5LinearDecoderLayer,
+}
+
+
+# ---------------------------------------------------------------------------
+# Qwen3.5 Language Model (dense variant)
+# ---------------------------------------------------------------------------
+
+
+class Qwen3_5ForCausalLM(nn.Module):
+    """Qwen3.5 causal language model with hybrid attention.
+
+    Alternates between full attention and GDN linear attention layers.
+    Dense (non-MoE) variant.
+    """
+
+    def __init__(self, config):
+        super().__init__()
+        tc = _get_text_config(config)
+        self.config = tc
+        self.hidden_size = tc.hidden_size
+        self.vocab_size = tc.vocab_size
+
+        # Embedding
+        self.embed_tokens = VocabParallelEmbedding(tc.vocab_size, tc.hidden_size)
+
+        # Build hybrid decoder layers with sequential GDN indexing
+        layer_types = _get_layer_types(tc)
+        self.layer_types = layer_types
+        self.layers = nn.ModuleList()
+        gdn_count = 0
+        self.full_attn_layer_ids = set()
+        for idx in range(tc.num_hidden_layers):
+            layer_type = layer_types[idx]
+            if layer_type == "linear_attention":
+                self.layers.append(
+                    Qwen3_5LinearDecoderLayer(config, idx, gdn_layer_idx=gdn_count)
+                )
+                gdn_count += 1
+            else:
+                self.layers.append(
+                    Qwen3_5AttentionDecoderLayer(config, idx)
+                )
+                self.full_attn_layer_ids.add(idx)
+        self.num_gdn_layers = gdn_count
+
+        # Final norm
+        self.norm = GemmaRMSNorm(tc.hidden_size, eps=tc.rms_norm_eps)
+
+        logger.info(
+            "Qwen3_5ForCausalLM: %d layers (%d attention + %d GDN)",
+            tc.num_hidden_layers,
+            len(self.full_attn_layer_ids),
+            self.num_gdn_layers,
+        )
+
+    @torch.no_grad()
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        forward_batch: Any,
+        input_embeds: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        if input_embeds is None:
+            hidden_states = self.embed_tokens(input_ids)
+        else:
+            hidden_states = input_embeds
+
+        residual = None
+        for layer in self.layers:
+            hidden_states, residual = layer(
+                positions=positions,
+                hidden_states=hidden_states,
+                residual=residual,
+                forward_batch=forward_batch,
+            )
+
+        # Final normalization
+        if residual is not None:
+            hidden_states, _ = self.norm(hidden_states, residual)
+        else:
+            hidden_states = self.norm(hidden_states)
+
+        return hidden_states
+
+    def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
+        """Load HuggingFace checkpoint weights with name remapping."""
+        stacked_params_mapping = [
+            ("gate_up_proj", "gate_proj", 0),
+            ("gate_up_proj", "up_proj", 1),
+        ]
+
+        params_dict = dict(self.named_parameters())
+        loaded: Set[str] = set()
+
+        for name, weight in weights:
+            if "rotary_emb.inv_freq" in name:
+                continue
+            if "mtp" in name:
+                continue
+            if "visual" in name:
+                continue
+            if "language_model" in name:
+                name = name.replace("model.language_model.", "")
+            if name.startswith("model."):
+                name = name[len("model."):]
+            # NOTE: do NOT strip .self_attn — pymllm keeps it as a submodule
+
+            # Handle stacked params (gate_up_proj = gate_proj + up_proj)
+            matched = False
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                if "mlp.experts" in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+                if name not in params_dict:
+                    continue
+                param = params_dict[name]
+                # gate_up_proj is a plain Linear — manually place each shard
+                output_dim = param.shape[0] // 2
+                param.data[shard_id * output_dim : (shard_id + 1) * output_dim].copy_(
+                    weight
+                )
+                matched = True
+                break
+
+            if not matched:
+                if name not in params_dict:
+                    continue
+                param = params_dict[name]
+                loader = getattr(param, "weight_loader", None)
+                if loader is not None:
+                    loader(param, weight)
+                else:
+                    # Squeeze conv1d weight from [C, 1, K] to [C, K]
+                    if weight.dim() != param.dim():
+                        weight = weight.squeeze()
+                    param.data.copy_(weight)
+
+            loaded.add(name)
+
+        logger.info("Loaded %d parameter tensors for Qwen3_5ForCausalLM", len(loaded))
+        return loaded
+
+
+# ---------------------------------------------------------------------------
+# Qwen3.5 Vision-Language Model
+# ---------------------------------------------------------------------------
+
+
+class Qwen3_5ForConditionalGeneration(nn.Module):
+    """Qwen3.5 multimodal model (text + vision).
+
+    Inherits vision encoder from Qwen3VL and uses Qwen3.5's hybrid
+    language model.
+    """
+
+    def __init__(self, config):
+        super().__init__()
+        from pymllm.models.qwen3_vl import (
+            Qwen3VLVisionModel,
+        )
+
+        self.config = config
+        tc = _get_text_config(config)
+
+        # Vision encoder (reuse Qwen3VL's vision model)
+        vision_config = getattr(config, "vision_config", None)
+        if vision_config is not None:
+            self.visual = Qwen3VLVisionModel(
+                depth=getattr(vision_config, "depth", 27),
+                hidden_size=getattr(vision_config, "hidden_size", 1152),
+                hidden_act=getattr(vision_config, "hidden_act", "gelu_pytorch_tanh"),
+                intermediate_size=getattr(vision_config, "intermediate_size", 4304),
+                num_heads=getattr(vision_config, "num_heads", 16),
+                in_channels=getattr(vision_config, "in_channels", 3),
+                patch_size=getattr(vision_config, "patch_size", 16),
+                spatial_merge_size=getattr(vision_config, "spatial_merge_size", 2),
+                temporal_patch_size=getattr(vision_config, "temporal_patch_size", 2),
+                out_hidden_size=getattr(vision_config, "out_hidden_size", 3584),
+                num_position_embeddings=getattr(
+                    vision_config, "num_position_embeddings", 2304
+                ),
+                deepstack_visual_indexes=getattr(
+                    vision_config, "deepstack_visual_indexes", [8, 16, 24]
+                ),
+                norm_eps=getattr(tc, "rms_norm_eps", 1e-6),
+            )
+        else:
+            self.visual = None
+
+        # Language model
+        self.model = Qwen3_5ForCausalLM(config)
+
+        # Expose hybrid model metadata for ModelRunner
+        self.num_gdn_layers = self.model.num_gdn_layers
+        self.full_attn_layer_ids = self.model.full_attn_layer_ids
+
+        # LM head (tied to embedding when tie_word_embeddings=True)
+        self.lm_head = Linear(tc.hidden_size, tc.vocab_size, bias=False)
+        if getattr(tc, "tie_word_embeddings", False):
+            self.lm_head.weight = self.model.embed_tokens.weight
+
+        # Vision token IDs
+        self.image_token_id = getattr(config, "image_token_id", 151655)
+        self.video_token_id = getattr(config, "video_token_id", 151656)
+
+    @torch.no_grad()
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        forward_batch: Any,
+        input_embeds: Optional[torch.Tensor] = None,
+        pixel_values: Optional[torch.Tensor] = None,
+        image_grid_thw: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        # Process vision inputs if provided
+        if input_embeds is None and pixel_values is not None and self.visual is not None:
+            input_embeds = self.model.embed_tokens(input_ids)
+            # Run vision encoder
+            visual_embeds = self.visual(pixel_values, grid_thw=image_grid_thw)
+            # Replace image/video token positions with visual embeddings
+            mask = (input_ids == self.image_token_id) | (input_ids == self.video_token_id)
+            if mask.any():
+                input_embeds[mask] = visual_embeds.reshape(-1, visual_embeds.shape[-1])
+
+        hidden_states = self.model(
+            input_ids=input_ids,
+            positions=positions,
+            forward_batch=forward_batch,
+            input_embeds=input_embeds,
+        )
+
+        # LM head
+        logits = self.lm_head(hidden_states)
+        return logits
+
+    def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
+        """Load weights, dispatching visual vs language params."""
+        visual_weights = []
+        language_weights = []
+
+        for name, weight in weights:
+            if "visual" in name or "model.visual" in name:
+                # Normalize visual weight names
+                name = name.replace("model.visual.", "visual.")
+                name = name.replace("attn.qkv.", "attn.qkv_proj.")
+                visual_weights.append((name, weight))
+            else:
+                language_weights.append((name, weight))
+
+        # Load language model weights
+        self.model.load_weights(language_weights)
+
+        # Load visual weights
+        if self.visual is not None and visual_weights:
+            params_dict = dict(self.named_parameters())
+            for name, weight in visual_weights:
+                if name in params_dict:
+                    param = params_dict[name]
+                    loader = getattr(param, "weight_loader", None)
+                    if loader is not None:
+                        loader(param, weight)
+                    else:
+                        param.data.copy_(weight)
+
+        logger.info("Qwen3_5ForConditionalGeneration weights loaded")
diff --git a/pymllm/models/qwen3_vl.py b/pymllm/models/qwen3_vl.py
new file mode 100644
index 00000000..3bee27c8
--- /dev/null
+++ b/pymllm/models/qwen3_vl.py
@@ -0,0 +1,1329 @@
+# Copyright 2025 Qwen Team
+# Copyright 2025 SGLang Team
+# Adapted for pymllm
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Inference-only Qwen3-VL model for pymllm.
+
+Adapted from sglang's Qwen3-VL implementation for pymllm's single-GPU
+inference architecture.  Uses pymllm layers (RadixAttention, RMSNorm, MLP)
+and conforms to the pymllm forward interface::
+
+    model.forward(input_ids, positions, forward_batch)
+
+Designed for a single accelerator card — no tensor / pipeline parallelism.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from pymllm.layers import RMSNorm, apply_mrope
+from pymllm.layers.attention.radix_attention import RadixAttention
+from pymllm.layers.mlp import MLP
+
+if TYPE_CHECKING:
+    from pymllm.engine.forward_batch import ForwardBatch
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Vision Encoder
+# ---------------------------------------------------------------------------
+
+
+class Qwen3VisionMLP(nn.Module):
+    """MLP block for the vision encoder."""
+
+    def __init__(
+        self,
+        in_features: int,
+        hidden_features: int,
+        hidden_act: str = "silu",
+        bias: bool = True,
+    ):
+        super().__init__()
+        self.linear_fc1 = nn.Linear(in_features, hidden_features, bias=bias)
+        self.linear_fc2 = nn.Linear(hidden_features, in_features, bias=bias)
+        if hidden_act == "gelu_pytorch_tanh":
+            self.act = nn.GELU(approximate="tanh")
+        elif hidden_act == "gelu":
+            self.act = nn.GELU()
+        else:
+            self.act = nn.SiLU()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.linear_fc2(self.act(self.linear_fc1(x)))
+
+
+class Qwen3VLVisionPatchEmbed(nn.Module):
+    """3D convolution patch embedding for video/image patchification."""
+
+    def __init__(
+        self,
+        patch_size: int = 16,
+        temporal_patch_size: int = 2,
+        in_channels: int = 3,
+        embed_dim: int = 1152,
+    ):
+        super().__init__()
+        self.patch_size = patch_size
+        self.temporal_patch_size = temporal_patch_size
+        self.in_channels = in_channels
+        self.embed_dim = embed_dim
+
+        kernel_size = [temporal_patch_size, patch_size, patch_size]
+        self.proj = nn.Conv3d(
+            in_channels,
+            embed_dim,
+            kernel_size=kernel_size,
+            stride=kernel_size,
+            bias=True,
+        )
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        target_dtype = self.proj.weight.dtype
+        hidden_states = hidden_states.view(
+            -1,
+            self.in_channels,
+            self.temporal_patch_size,
+            self.patch_size,
+            self.patch_size,
+        )
+        hidden_states = self.proj(hidden_states.to(dtype=target_dtype)).view(
+            -1, self.embed_dim
+        )
+        return hidden_states
+
+
+def _rotate_half(x: torch.Tensor) -> torch.Tensor:
+    """Rotate half the hidden dims of the input for RoPE."""
+    x1 = x[..., : x.shape[-1] // 2]
+    x2 = x[..., x.shape[-1] // 2 :]
+    return torch.cat((-x2, x1), dim=-1)
+
+
+class Qwen3VisionAttention(nn.Module):
+    """Multi-head self-attention for the vision encoder (no KV cache)."""
+
+    def __init__(self, embed_dim: int, num_heads: int):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.head_dim = embed_dim // num_heads
+
+        self.qkv_proj = nn.Linear(embed_dim, 3 * embed_dim, bias=True)
+        self.out_proj = nn.Linear(embed_dim, embed_dim, bias=True)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        cu_seqlens: torch.Tensor,
+        rotary_pos_emb_cos: torch.Tensor,
+        rotary_pos_emb_sin: torch.Tensor,
+    ) -> torch.Tensor:
+        """Forward pass with variable-length sequences via cu_seqlens.
+
+        Args:
+            x: [total_tokens, embed_dim]
+            cu_seqlens: [num_seqs + 1] cumulative sequence lengths
+            rotary_pos_emb_cos: [total_tokens, rotary_dim]
+            rotary_pos_emb_sin: [total_tokens, rotary_dim]
+        """
+        seq_len = x.shape[0]
+        qkv = self.qkv_proj(x)
+        q, k, v = qkv.reshape(seq_len, 3, self.num_heads, self.head_dim).unbind(dim=1)
+
+        # Apply rotary position embedding.
+        # cos/sin are [total_tokens, head_dim // 2].  Following sglang's
+        # VisionAttention: double them to full head_dim and apply RoPE to
+        # all head dimensions (the rotation pairs (q[i], q[i + head_dim//2])).
+        cos = rotary_pos_emb_cos
+        sin = rotary_pos_emb_sin
+        if cos.shape[-1] * 2 == self.head_dim:
+            cos = torch.cat([cos, cos], dim=-1)
+            sin = torch.cat([sin, sin], dim=-1)
+
+        cos = cos.unsqueeze(1)  # [seq, 1, head_dim]
+        sin = sin.unsqueeze(1)  # [seq, 1, head_dim]
+
+        q = q * cos + _rotate_half(q) * sin
+        k = k * cos + _rotate_half(k) * sin
+
+        # Scaled dot-product attention per variable-length sequence
+        output = torch.empty_like(q)
+        num_seqs = cu_seqlens.shape[0] - 1
+        for i in range(num_seqs):
+            start = cu_seqlens[i].item()
+            end = cu_seqlens[i + 1].item()
+            qi = q[start:end].transpose(0, 1).unsqueeze(0)  # [1, heads, seq, dim]
+            ki = k[start:end].transpose(0, 1).unsqueeze(0)
+            vi = v[start:end].transpose(0, 1).unsqueeze(0)
+            oi = F.scaled_dot_product_attention(qi, ki, vi)
+            output[start:end] = oi.squeeze(0).transpose(0, 1)
+
+        output = output.reshape(seq_len, self.embed_dim)
+        return self.out_proj(output)
+
+
+class Qwen3VisionBlock(nn.Module):
+    """Single vision transformer block."""
+
+    def __init__(
+        self,
+        dim: int,
+        num_heads: int,
+        intermediate_dim: int,
+        hidden_act: str = "silu",
+        norm_eps: float = 1e-6,
+    ):
+        super().__init__()
+        self.norm1 = nn.LayerNorm(dim, eps=norm_eps)
+        self.norm2 = nn.LayerNorm(dim, eps=norm_eps)
+        self.attn = Qwen3VisionAttention(embed_dim=dim, num_heads=num_heads)
+        self.mlp = Qwen3VisionMLP(
+            dim, intermediate_dim, hidden_act=hidden_act, bias=True
+        )
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        cu_seqlens: torch.Tensor,
+        rotary_pos_emb_cos: torch.Tensor,
+        rotary_pos_emb_sin: torch.Tensor,
+    ) -> torch.Tensor:
+        x = x + self.attn(
+            self.norm1(x),
+            cu_seqlens=cu_seqlens,
+            rotary_pos_emb_cos=rotary_pos_emb_cos,
+            rotary_pos_emb_sin=rotary_pos_emb_sin,
+        )
+        x = x + self.mlp(self.norm2(x))
+        return x
+
+
+class Qwen3VLVisionPatchMerger(nn.Module):
+    """Merges spatial patches to reduce sequence length.
+
+    Groups ``spatial_merge_size ** 2`` consecutive patch tokens and projects
+    them to the language model hidden dimension.
+    """
+
+    def __init__(
+        self,
+        dim: int,
+        context_dim: int,
+        spatial_merge_size: int = 2,
+        use_postshuffle_norm: bool = False,
+        norm_eps: float = 1e-6,
+    ):
+        super().__init__()
+        self.hidden_size = context_dim * (spatial_merge_size**2)
+        self.use_postshuffle_norm = use_postshuffle_norm
+        self.norm = nn.LayerNorm(
+            self.hidden_size if use_postshuffle_norm else context_dim, eps=norm_eps
+        )
+        self.linear_fc1 = nn.Linear(self.hidden_size, self.hidden_size, bias=True)
+        self.act_fn = nn.GELU()
+        self.linear_fc2 = nn.Linear(self.hidden_size, dim, bias=True)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.use_postshuffle_norm:
+            x = self.norm(x.view(-1, self.hidden_size))
+        else:
+            x = self.norm(x).view(-1, self.hidden_size)
+        x = self.act_fn(self.linear_fc1(x))
+        return self.linear_fc2(x)
+
+
+class Qwen3VLVisionModel(nn.Module):
+    """Complete vision encoder for Qwen3-VL.
+
+    Produces patch embeddings from raw pixel values, applies a stack of
+    vision transformer blocks with 3D rotary embeddings, then merges
+    spatial patches.  Supports "deep stack" where intermediate layer
+    outputs are captured and concatenated to the final output.
+    """
+
+    def __init__(
+        self,
+        depth: int = 27,
+        hidden_size: int = 1152,
+        hidden_act: str = "gelu_pytorch_tanh",
+        intermediate_size: int = 4304,
+        num_heads: int = 16,
+        in_channels: int = 3,
+        patch_size: int = 16,
+        spatial_merge_size: int = 2,
+        temporal_patch_size: int = 2,
+        out_hidden_size: int = 3584,
+        num_position_embeddings: int = 2304,
+        deepstack_visual_indexes: Optional[List[int]] = None,
+        norm_eps: float = 1e-6,
+    ):
+        super().__init__()
+        if deepstack_visual_indexes is None:
+            deepstack_visual_indexes = [8, 16, 24]
+
+        self.hidden_size = hidden_size
+        self.num_heads = num_heads
+        self.num_position_embeddings = num_position_embeddings
+        self.num_grid_per_side = int(num_position_embeddings**0.5)
+        self.patch_size = patch_size
+        self.spatial_merge_size = spatial_merge_size
+        self.temporal_patch_size = temporal_patch_size
+        self.deepstack_visual_indexes = deepstack_visual_indexes
+        # Total output dim = out_hidden_size * (1 main + N deepstack mergers)
+        self.out_hidden_size = out_hidden_size * (1 + len(deepstack_visual_indexes))
+
+        self.patch_embed = Qwen3VLVisionPatchEmbed(
+            patch_size=patch_size,
+            temporal_patch_size=temporal_patch_size,
+            in_channels=in_channels,
+            embed_dim=hidden_size,
+        )
+
+        self.pos_embed = nn.Embedding(num_position_embeddings, hidden_size)
+
+        head_dim = hidden_size // num_heads
+        self._init_rope_cache(head_dim)
+
+        self.blocks = nn.ModuleList(
+            [
+                Qwen3VisionBlock(
+                    dim=hidden_size,
+                    num_heads=num_heads,
+                    intermediate_dim=intermediate_size,
+                    hidden_act=hidden_act,
+                    norm_eps=norm_eps,
+                )
+                for _ in range(depth)
+            ]
+        )
+
+        self.merger = Qwen3VLVisionPatchMerger(
+            dim=out_hidden_size,
+            context_dim=hidden_size,
+            spatial_merge_size=spatial_merge_size,
+            norm_eps=norm_eps,
+        )
+
+        self.deepstack_merger_list = nn.ModuleList(
+            [
+                Qwen3VLVisionPatchMerger(
+                    dim=out_hidden_size,
+                    context_dim=hidden_size,
+                    spatial_merge_size=spatial_merge_size,
+                    use_postshuffle_norm=True,
+                    norm_eps=norm_eps,
+                )
+                for _ in range(len(deepstack_visual_indexes))
+            ]
+        )
+
+    def _init_rope_cache(self, head_dim: int, max_grid_size: int = 8192):
+        """Precompute cos/sin cache for 2D rotary embeddings."""
+        rotary_dim = head_dim // 2
+        inv_freq = 1.0 / (
+            10000.0
+            ** (torch.arange(0, rotary_dim, 2, dtype=torch.float32) / rotary_dim)
+        )
+        t = torch.arange(max_grid_size, dtype=torch.float32)
+        freqs = torch.outer(t, inv_freq)
+        self.register_buffer("cos_cache", torch.cos(freqs), persistent=False)
+        self.register_buffer("sin_cache", torch.sin(freqs), persistent=False)
+
+    @property
+    def dtype(self) -> torch.dtype:
+        return self.patch_embed.proj.weight.dtype
+
+    @property
+    def device(self) -> torch.device:
+        return self.patch_embed.proj.weight.device
+
+    # -- Rotary position embedding helpers --
+
+    @staticmethod
+    def _rot_pos_ids(h: int, w: int, spatial_merge_size: int) -> torch.Tensor:
+        """Compute 2D rotary position IDs for a grid of *h* x *w* patches.
+
+        The patches are re-ordered to group ``spatial_merge_size ** 2``
+        neighbours together (matching the merger's token order).
+
+        Returns tensor of shape ``[h*w, 2]`` with ``(height_pos, width_pos)``.
+        """
+        merge = spatial_merge_size
+        h_ids = torch.arange(h).unsqueeze(1).expand(-1, w)
+        w_ids = torch.arange(w).unsqueeze(0).expand(h, -1)
+
+        h_ids = h_ids.reshape(h // merge, merge, w // merge, merge)
+        w_ids = w_ids.reshape(h // merge, merge, w // merge, merge)
+
+        h_ids = h_ids.permute(0, 2, 1, 3).flatten()
+        w_ids = w_ids.permute(0, 2, 1, 3).flatten()
+
+        return torch.stack([h_ids, w_ids], dim=-1)
+
+    def rot_pos_emb(
+        self, grid_thw: List[List[int]]
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Compute rotary pos-emb cos/sin for all images/videos in the batch."""
+        pos_ids = []
+        for t, h, w in grid_thw:
+            base = self._rot_pos_ids(h, w, self.spatial_merge_size)
+            pos_ids.append(base if t == 1 else base.repeat(t, 1))
+
+        pos_ids = torch.cat(pos_ids, dim=0).to(self.device, non_blocking=True)
+        cos_combined = self.cos_cache[pos_ids].flatten(1)
+        sin_combined = self.sin_cache[pos_ids].flatten(1)
+        return cos_combined, sin_combined
+
+    # -- Position embedding interpolation --
+
+    def _get_interpolation_indices(self, dim_size: int) -> np.ndarray:
+        indices = (np.arange(dim_size, dtype=np.float32) + 0.5) * (
+            self.num_grid_per_side / dim_size
+        ) - 0.5
+        return np.clip(indices, 0, self.num_grid_per_side - 1)
+
+    def _calculate_indices_and_weights(
+        self, h_idxs: np.ndarray, w_idxs: np.ndarray
+    ) -> Tuple[List[np.ndarray], List[np.ndarray]]:
+        """Compute bilinear interpolation indices and weights."""
+        side = self.num_grid_per_side
+        h_f = np.floor(h_idxs).astype(np.int64)
+        h_c = np.clip(h_f + 1, 0, side - 1)
+        dh = h_idxs - h_f
+        w_f = np.floor(w_idxs).astype(np.int64)
+        w_c = np.clip(w_f + 1, 0, side - 1)
+        dw = w_idxs - w_f
+
+        indices = [
+            (h_f[:, None] * side + w_f).flatten(),
+            (h_f[:, None] * side + w_c).flatten(),
+            (h_c[:, None] * side + w_f).flatten(),
+            (h_c[:, None] * side + w_c).flatten(),
+        ]
+        weights = [
+            ((1 - dh)[:, None] * (1 - dw)).flatten(),
+            ((1 - dh)[:, None] * dw).flatten(),
+            (dh[:, None] * (1 - dw)).flatten(),
+            (dh[:, None] * dw).flatten(),
+        ]
+        return indices, weights
+
+    def _get_position_embedding(
+        self,
+        patch_pos_embeds: List[torch.Tensor],
+        grid_ts: List[int],
+        grid_hs: List[int],
+        grid_ws: List[int],
+    ) -> torch.Tensor:
+        """Tile and reorganize position embeddings to align with the merged token order."""
+        result_parts = []
+        merge = self.spatial_merge_size
+        for pos_embed, t, h, w in zip(patch_pos_embeds, grid_ts, grid_hs, grid_ws):
+            pos_embed = pos_embed.repeat(t, 1)
+            pos_embed = (
+                pos_embed.view(t, h // merge, merge, w // merge, merge, -1)
+                .permute(0, 1, 3, 2, 4, 5)
+                .flatten(0, 4)
+            )
+            result_parts.append(pos_embed)
+        return torch.cat(result_parts, dim=0)
+
+    def fast_pos_embed_interpolate(self, grid_thw: torch.Tensor) -> torch.Tensor:
+        """Interpolate position embeddings via bilinear interpolation."""
+        grid_thw_cpu = grid_thw.cpu().numpy()
+        temporal_dims = grid_thw_cpu[:, 0].tolist()
+        height_dims = grid_thw_cpu[:, 1].tolist()
+        width_dims = grid_thw_cpu[:, 2].tolist()
+
+        device = self.pos_embed.weight.device
+        dtype = self.pos_embed.weight.dtype
+
+        patches_size = [h * w for h, w in zip(height_dims, width_dims)]
+        total_patches = sum(patches_size)
+        all_indices_np = np.zeros((4, total_patches), dtype=np.int64)
+        all_weights_np = np.zeros((4, total_patches), dtype=np.float32)
+
+        current_idx = 0
+        for _t, h, w in zip(temporal_dims, height_dims, width_dims):
+            h_idxs = self._get_interpolation_indices(h)
+            w_idxs = self._get_interpolation_indices(w)
+            indices, weights = self._calculate_indices_and_weights(h_idxs, w_idxs)
+            end_idx = current_idx + h * w
+            for i in range(4):
+                all_indices_np[i, current_idx:end_idx] = indices[i]
+                all_weights_np[i, current_idx:end_idx] = weights[i]
+            current_idx = end_idx
+
+        idx_tensor = torch.from_numpy(all_indices_np).to(device)
+        weight_tensor = torch.from_numpy(all_weights_np).to(dtype=dtype, device=device)
+
+        pos_embeds = self.pos_embed(idx_tensor.view(-1))
+        pos_embeds = pos_embeds.view(4, total_patches, -1)
+        patch_pos_embeds = (pos_embeds * weight_tensor.unsqueeze(-1)).sum(dim=0)
+        patch_pos_embeds = patch_pos_embeds.split(patches_size)
+        return self._get_position_embedding(
+            list(patch_pos_embeds), temporal_dims, height_dims, width_dims
+        )
+
+    # -- Forward --
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        grid_thw: torch.Tensor,
+    ) -> torch.Tensor:
+        """Run the vision encoder.
+
+        Args:
+            x: Pixel values, shape ``[total_patches, patch_dim]``.
+            grid_thw: Grid dimensions ``[num_images, 3]`` with ``(T, H, W)``.
+
+        Returns:
+            Vision features of shape
+            ``[num_merged_tokens, out_hidden_size * (1 + num_deepstack)]``.
+        """
+        x = x.to(device=self.device, dtype=self.dtype)
+        x = self.patch_embed(x)
+
+        if isinstance(grid_thw, list):
+            grid_thw_list = grid_thw
+            grid_thw = torch.tensor(grid_thw, dtype=torch.int32)
+        else:
+            grid_thw_list = grid_thw.tolist()
+
+        pos_embeds = self.fast_pos_embed_interpolate(grid_thw)
+        x += pos_embeds
+
+        rotary_pos_emb_cos, rotary_pos_emb_sin = self.rot_pos_emb(grid_thw_list)
+
+        cu_seqlens = _compute_cu_seqlens_from_grid(grid_thw)
+        cu_seqlens = cu_seqlens.to(self.device, non_blocking=True)
+
+        deepstack_features = []
+        ds_idx = 0
+
+        for layer_num, blk in enumerate(self.blocks):
+            x = blk(x, cu_seqlens, rotary_pos_emb_cos, rotary_pos_emb_sin)
+
+            if layer_num in self.deepstack_visual_indexes:
+                # x is [total_tokens, hidden].  The merger expects the last
+                # dim to be context_dim so it can group spatial_merge_size^2
+                # tokens; reshape to [total_tokens, 1, hidden] so that the
+                # `.view(-1, hidden_size)` inside the merger collapses the
+                # spatial merge correctly.
+                ds_feat = self.deepstack_merger_list[ds_idx](x.unsqueeze(1))
+                deepstack_features.append(ds_feat)
+                ds_idx += 1
+
+        x = self.merger(x.unsqueeze(1))
+
+        # Concatenate main + deepstack features along the feature dimension.
+        # Result: [num_merged_tokens, out_hidden_size * (1 + num_deepstack)]
+        hidden_states = torch.cat([x] + deepstack_features, dim=-1)
+        return hidden_states
+
+
+def _compute_cu_seqlens_from_grid(grid_thw: torch.Tensor) -> torch.Tensor:
+    """Compute cumulative sequence lengths from grid dimensions."""
+    grid_np = grid_thw.cpu().numpy()
+    seq_lens = (grid_np[:, 0] * grid_np[:, 1] * grid_np[:, 2]).astype(np.int32)
+    cu_seqlens = np.concatenate([[0], np.cumsum(seq_lens)])
+    return torch.tensor(cu_seqlens, dtype=torch.int32)
+
+
+def _build_cos_sin_cache(
+    head_dim: int,
+    rope_theta: float,
+    max_pos: int,
+    dtype: torch.dtype,
+) -> torch.Tensor:
+    """Build a [max_pos, head_dim] cos/sin cache for M-RoPE.
+
+    Layout: first ``head_dim // 2`` columns are cos values, second half are sin.
+    Each row corresponds to one position index.
+    """
+    inv_freq = 1.0 / (
+        rope_theta ** (torch.arange(0, head_dim, 2, dtype=torch.float32) / head_dim)
+    )
+    t = torch.arange(max_pos, dtype=torch.float32)
+    freqs = torch.outer(t, inv_freq)  # [max_pos, head_dim // 2]
+    return torch.cat([torch.cos(freqs), torch.sin(freqs)], dim=-1).to(dtype)
+
+
+def get_rope_index(
+    input_ids: torch.Tensor,
+    image_grid_thw: Optional[torch.Tensor],
+    image_token_id: int,
+    vision_start_token_id: int,
+    spatial_merge_size: int,
+) -> Tuple[torch.Tensor, int]:
+    """Compute M-RoPE 3-D position IDs for one sequence.
+
+    For text tokens all three (temporal, height, width) indices are equal to
+    the sequential counter.  For image tokens the indices follow the spatial
+    grid ``(t, h, w)``.
+
+    Args:
+        input_ids: Token IDs for one sequence, shape ``[T]``.
+        image_grid_thw: Grid dimensions for every image in the sequence,
+            shape ``[num_images, 3]``.  ``None`` when there are no images.
+        image_token_id: Token ID used as placeholder for image patches.
+        vision_start_token_id: Token ID that precedes each image block.
+        spatial_merge_size: Number of patches merged per spatial dimension
+            (e.g. 2 → 2x2 merge, so llm_grid_h = H // 2).
+
+    Returns:
+        ``(position_ids, mrope_position_delta)`` where ``position_ids`` has
+        shape ``[3, T]`` and ``mrope_position_delta`` is a Python ``int``
+        equal to ``max_position_used + 1 - T``.
+    """
+    total_tokens = input_ids.shape[0]
+    device = input_ids.device
+    position_ids = torch.zeros(3, total_tokens, dtype=torch.long, device=device)
+
+    if image_grid_thw is None or image_grid_thw.shape[0] == 0:
+        pos = torch.arange(total_tokens, dtype=torch.long, device=device)
+        position_ids[0] = pos
+        position_ids[1] = pos
+        position_ids[2] = pos
+        return position_ids, 0
+
+    input_ids_cpu = input_ids.cpu().tolist()
+    grid_thw_list = image_grid_thw.cpu().tolist()
+
+    llm_pos_ids_start = 0
+    image_idx = 0
+    i = 0
+
+    while i < total_tokens:
+        token = input_ids_cpu[i]
+
+        if token == vision_start_token_id and image_idx < len(grid_thw_list):
+            # The vision_start token itself gets a regular sequential position.
+            position_ids[:, i] = llm_pos_ids_start
+            llm_pos_ids_start += 1
+            i += 1
+
+            # Compute LLM-side grid dimensions (after spatial merging).
+            t_g = int(grid_thw_list[image_idx][0])
+            h_g = int(grid_thw_list[image_idx][1])
+            w_g = int(grid_thw_list[image_idx][2])
+            llm_grid_t = t_g
+            llm_grid_h = h_g // spatial_merge_size
+            llm_grid_w = w_g // spatial_merge_size
+            num_image_tokens = llm_grid_t * llm_grid_h * llm_grid_w
+
+            # Build per-patch 3-D indices.
+            t_idx = (
+                torch.arange(llm_grid_t, device=device)
+                .view(-1, 1, 1)
+                .expand(-1, llm_grid_h, llm_grid_w)
+                .flatten()
+            )
+            h_idx = (
+                torch.arange(llm_grid_h, device=device)
+                .view(1, -1, 1)
+                .expand(llm_grid_t, -1, llm_grid_w)
+                .flatten()
+            )
+            w_idx = (
+                torch.arange(llm_grid_w, device=device)
+                .view(1, 1, -1)
+                .expand(llm_grid_t, llm_grid_h, -1)
+                .flatten()
+            )
+
+            img_start = i
+            img_end = i + num_image_tokens
+            position_ids[0, img_start:img_end] = t_idx + llm_pos_ids_start
+            position_ids[1, img_start:img_end] = h_idx + llm_pos_ids_start
+            position_ids[2, img_start:img_end] = w_idx + llm_pos_ids_start
+
+            llm_pos_ids_start += max(llm_grid_t, llm_grid_h, llm_grid_w)
+            i += num_image_tokens
+            image_idx += 1
+        else:
+            # Text token (including vision_end and all non-image tokens).
+            position_ids[:, i] = llm_pos_ids_start
+            llm_pos_ids_start += 1
+            i += 1
+
+    mrope_position_delta = llm_pos_ids_start - total_tokens
+    return position_ids, mrope_position_delta
+
+
+# ---------------------------------------------------------------------------
+# Text Decoder (Language Model)
+# ---------------------------------------------------------------------------
+
+
+class Qwen3VLAttention(nn.Module):
+    """Attention layer for the Qwen3-VL text decoder.
+
+    Uses QK-norm (per-head RMSNorm on Q and K before RoPE) and
+    :class:`RadixAttention` for KV-cached inference.  Applies
+    interleaved M-RoPE with a precomputed cos/sin cache.
+    """
+
+    def __init__(
+        self,
+        hidden_size: int,
+        num_heads: int,
+        num_kv_heads: int,
+        head_dim: int,
+        layer_id: int,
+        rope_theta: float = 5_000_000.0,
+        rms_norm_eps: float = 1e-6,
+        mrope_section: Tuple[int, int, int] = (24, 20, 20),
+        mrope_interleaved: bool = True,
+        max_position_embeddings: int = 32768,
+    ):
+        super().__init__()
+        self.num_heads = num_heads
+        self.num_kv_heads = num_kv_heads
+        self.head_dim = head_dim
+        self.q_size = num_heads * head_dim
+        self.kv_size = num_kv_heads * head_dim
+        self.scaling = head_dim**-0.5
+        self.mrope_section = list(mrope_section)
+        self.mrope_interleaved = mrope_interleaved
+
+        # Fused QKV projection
+        self.qkv_proj = nn.Linear(
+            hidden_size, self.q_size + 2 * self.kv_size, bias=False
+        )
+
+        # Output projection
+        self.o_proj = nn.Linear(num_heads * head_dim, hidden_size, bias=False)
+
+        # QK normalization
+        self.q_norm = RMSNorm(head_dim, eps=rms_norm_eps)
+        self.k_norm = RMSNorm(head_dim, eps=rms_norm_eps)
+
+        # Precomputed M-RoPE cos/sin cache: [max_pos, head_dim]
+        cos_sin = _build_cos_sin_cache(
+            head_dim, rope_theta, max_position_embeddings, torch.float32
+        )
+        self.register_buffer("cos_sin_cache", cos_sin, persistent=False)
+
+        # Radix attention (single-GPU: heads == tp_heads)
+        self.attn = RadixAttention(
+            num_heads=num_heads,
+            head_dim=head_dim,
+            scaling=self.scaling,
+            num_kv_heads=num_kv_heads,
+            layer_id=layer_id,
+        )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        forward_batch: "ForwardBatch",
+    ) -> torch.Tensor:
+        qkv = self.qkv_proj(hidden_states)
+        q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
+
+        # Per-head QK normalization
+        q = self.q_norm(q.view(-1, self.num_heads, self.head_dim))
+        k = self.k_norm(k.view(-1, self.num_kv_heads, self.head_dim))
+
+        # Apply M-RoPE. positions is [3, T] for prefill (3-D) or may arrive
+        # as [T] for purely text-only batches; expand to [3, T] in that case.
+        if positions.ndim == 1:
+            positions = positions.unsqueeze(0).expand(3, -1)
+        q, k = apply_mrope(
+            q,
+            k,
+            positions,
+            self.cos_sin_cache.to(q.dtype),
+            self.mrope_section,
+            self.mrope_interleaved,
+        )
+
+        q = q.reshape(-1, self.q_size)
+        k = k.reshape(-1, self.kv_size)
+
+        # Attention with KV cache
+        attn_output = self.attn(q, k, v, forward_batch)
+        return self.o_proj(attn_output)
+
+
+class Qwen3VLDecoderLayer(nn.Module):
+    """Single decoder layer for the Qwen3-VL text model."""
+
+    def __init__(
+        self,
+        hidden_size: int,
+        num_heads: int,
+        num_kv_heads: int,
+        head_dim: int,
+        intermediate_size: int,
+        layer_id: int,
+        rope_theta: float = 5_000_000.0,
+        rms_norm_eps: float = 1e-6,
+        mrope_section: Tuple[int, int, int] = (24, 20, 20),
+        mrope_interleaved: bool = True,
+        max_position_embeddings: int = 32768,
+    ):
+        super().__init__()
+        self.self_attn = Qwen3VLAttention(
+            hidden_size=hidden_size,
+            num_heads=num_heads,
+            num_kv_heads=num_kv_heads,
+            head_dim=head_dim,
+            layer_id=layer_id,
+            rope_theta=rope_theta,
+            rms_norm_eps=rms_norm_eps,
+            mrope_section=mrope_section,
+            mrope_interleaved=mrope_interleaved,
+            max_position_embeddings=max_position_embeddings,
+        )
+        self.mlp = MLP(
+            hidden_size=hidden_size,
+            intermediate_size=intermediate_size,
+            activation="silu",
+            use_fused_gate_up_proj=True,
+            use_bias_gate_up=False,
+            use_bias_down=False,
+        )
+        self.input_layernorm = RMSNorm(hidden_size, eps=rms_norm_eps)
+        self.post_attention_layernorm = RMSNorm(hidden_size, eps=rms_norm_eps)
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        forward_batch: "ForwardBatch",
+        deepstack_embeds: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        # Self-attention
+        residual = hidden_states
+        hidden_states = self.input_layernorm(hidden_states)
+        hidden_states = self.self_attn(positions, hidden_states, forward_batch)
+        hidden_states = residual + hidden_states
+
+        # Add deepstack embeddings after residual (matches HF ordering)
+        if deepstack_embeds is not None:
+            hidden_states = hidden_states + deepstack_embeds
+
+        # MLP
+        residual = hidden_states
+        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+        hidden_states = residual + hidden_states
+
+        return hidden_states
+
+
+class Qwen3VLTextModel(nn.Module):
+    """Qwen3-VL text backbone (embedding + decoder layers + final norm)."""
+
+    def __init__(
+        self,
+        vocab_size: int = 151936,
+        hidden_size: int = 4096,
+        intermediate_size: int = 22016,
+        num_hidden_layers: int = 32,
+        num_attention_heads: int = 32,
+        num_key_value_heads: int = 32,
+        head_dim: int = 128,
+        rope_theta: float = 5_000_000.0,
+        rms_norm_eps: float = 1e-6,
+        mrope_section: Tuple[int, int, int] = (24, 20, 20),
+        mrope_interleaved: bool = True,
+        max_position_embeddings: int = 32768,
+    ):
+        super().__init__()
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+
+        self.embed_tokens = nn.Embedding(vocab_size, hidden_size)
+
+        self.layers = nn.ModuleList(
+            [
+                Qwen3VLDecoderLayer(
+                    hidden_size=hidden_size,
+                    num_heads=num_attention_heads,
+                    num_kv_heads=num_key_value_heads,
+                    head_dim=head_dim,
+                    intermediate_size=intermediate_size,
+                    layer_id=layer_id,
+                    rope_theta=rope_theta,
+                    rms_norm_eps=rms_norm_eps,
+                    mrope_section=mrope_section,
+                    mrope_interleaved=mrope_interleaved,
+                    max_position_embeddings=max_position_embeddings,
+                )
+                for layer_id in range(num_hidden_layers)
+            ]
+        )
+
+        self.norm = RMSNorm(hidden_size, eps=rms_norm_eps)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        forward_batch: "ForwardBatch",
+        input_embeds: Optional[torch.Tensor] = None,
+        input_deepstack_embeds: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        if input_embeds is None:
+            hidden_states = self.embed_tokens(input_ids)
+        else:
+            hidden_states = input_embeds
+
+        for layer_idx, layer in enumerate(self.layers):
+            ds_embeds = _get_deepstack_embeds(
+                layer_idx, input_deepstack_embeds, self.hidden_size
+            )
+            hidden_states = layer(
+                positions,
+                hidden_states,
+                forward_batch,
+                deepstack_embeds=ds_embeds,
+            )
+
+        return self.norm(hidden_states)
+
+
+def _get_deepstack_embeds(
+    layer_idx: int,
+    input_deepstack_embeds: Optional[torch.Tensor],
+    hidden_size: int,
+) -> Optional[torch.Tensor]:
+    """Extract deepstack embeddings for a specific decoder layer."""
+    if input_deepstack_embeds is None:
+        return None
+    num_deepstack = input_deepstack_embeds.shape[-1] // hidden_size
+    if layer_idx >= num_deepstack:
+        return None
+    start = hidden_size * layer_idx
+    return input_deepstack_embeds[:, start : start + hidden_size]
+
+
+# ---------------------------------------------------------------------------
+# Full Model: Qwen3VLForConditionalGeneration
+# ---------------------------------------------------------------------------
+
+
+class Qwen3VLForConditionalGeneration(nn.Module):
+    """Qwen3-VL multimodal model for conditional generation.
+
+    Combines a vision encoder and text decoder.  During prefill, image/video
+    tokens are replaced with visual features from the vision encoder.
+    During decode, the model runs only the text decoder.
+
+    Forward interface::
+
+        logits = model.forward(input_ids, positions, forward_batch)
+    """
+
+    def __init__(self, config) -> None:
+        super().__init__()
+        self.config = config
+
+        text_config = getattr(config, "text_config", config)
+        vision_config = getattr(config, "vision_config", None)
+
+        # Vision encoder
+        if vision_config is not None:
+            self.visual = Qwen3VLVisionModel(
+                depth=getattr(vision_config, "depth", 27),
+                hidden_size=getattr(vision_config, "hidden_size", 1152),
+                hidden_act=getattr(vision_config, "hidden_act", "gelu_pytorch_tanh"),
+                intermediate_size=getattr(vision_config, "intermediate_size", 4304),
+                num_heads=getattr(vision_config, "num_heads", 16),
+                in_channels=getattr(vision_config, "in_channels", 3),
+                patch_size=getattr(vision_config, "patch_size", 16),
+                spatial_merge_size=getattr(vision_config, "spatial_merge_size", 2),
+                temporal_patch_size=getattr(vision_config, "temporal_patch_size", 2),
+                out_hidden_size=getattr(vision_config, "out_hidden_size", 3584),
+                num_position_embeddings=getattr(
+                    vision_config, "num_position_embeddings", 2304
+                ),
+                deepstack_visual_indexes=getattr(
+                    vision_config, "deepstack_visual_indexes", [8, 16, 24]
+                ),
+                norm_eps=getattr(text_config, "rms_norm_eps", 1e-6),
+            )
+        else:
+            self.visual = None
+
+        # Text decoder
+        hidden_size = getattr(text_config, "hidden_size", 4096)
+        vocab_size = getattr(text_config, "vocab_size", 151936)
+
+        # M-RoPE configuration -- mrope_section lives inside rope_scaling,
+        # NOT as a top-level attribute of text_config.
+        rope_scaling = getattr(text_config, "rope_scaling", None) or {}
+        if isinstance(rope_scaling, dict):
+            mrope_section = rope_scaling.get("mrope_section", [24, 20, 20])
+            mrope_interleaved = rope_scaling.get("mrope_interleaved", True)
+        else:
+            mrope_section = getattr(rope_scaling, "mrope_section", [24, 20, 20])
+            mrope_interleaved = getattr(rope_scaling, "mrope_interleaved", True)
+        max_position_embeddings = getattr(text_config, "max_position_embeddings", 32768)
+
+        self.model = Qwen3VLTextModel(
+            vocab_size=vocab_size,
+            hidden_size=hidden_size,
+            intermediate_size=getattr(text_config, "intermediate_size", 22016),
+            num_hidden_layers=getattr(text_config, "num_hidden_layers", 32),
+            num_attention_heads=getattr(text_config, "num_attention_heads", 32),
+            num_key_value_heads=getattr(text_config, "num_key_value_heads", 32),
+            head_dim=getattr(text_config, "head_dim", 128),
+            rope_theta=getattr(text_config, "rope_theta", 5_000_000.0),
+            rms_norm_eps=getattr(text_config, "rms_norm_eps", 1e-6),
+            mrope_section=tuple(mrope_section),
+            mrope_interleaved=bool(mrope_interleaved),
+            max_position_embeddings=max_position_embeddings,
+        )
+
+        # LM head — following sglang's pattern: always use lm_head.weight
+        # for matmul in forward(), so it works whether lm_head is nn.Embedding
+        # (tied) or nn.Linear (untied).
+        tie_word_embeddings = getattr(config, "tie_word_embeddings", False)
+        if tie_word_embeddings:
+            self.lm_head = self.model.embed_tokens
+        else:
+            self.lm_head = nn.Linear(hidden_size, vocab_size, bias=False)
+
+        # Token IDs for multimodal
+        self.image_token_id = getattr(config, "image_token_id", 151655)
+        self.video_token_id = getattr(config, "video_token_id", 151656)
+        self.vision_start_token_id = getattr(config, "vision_start_token_id", 151652)
+
+        # Spatial merge size (needed for get_rope_index)
+        self.spatial_merge_size = (
+            getattr(vision_config, "spatial_merge_size", 2)
+            if vision_config is not None
+            else 2
+        )
+
+        # Deepstack config
+        if vision_config is not None:
+            ds_indexes = getattr(vision_config, "deepstack_visual_indexes", [8, 16, 24])
+            self.num_deepstack_embeddings = len(ds_indexes)
+        else:
+            self.num_deepstack_embeddings = 0
+
+        self._hidden_size = hidden_size
+
+    def get_input_embeddings(self) -> nn.Module:
+        return self.model.embed_tokens
+
+    @torch.no_grad()
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        forward_batch: "ForwardBatch",
+    ) -> torch.Tensor:
+        """Run forward pass for Qwen3-VL.
+
+        Args:
+            input_ids: Flattened input token IDs, shape ``[num_tokens]``.
+            positions: Position IDs, shape ``[num_tokens]`` (1-D, from model
+                runner).  Overridden internally with 3-D M-RoPE positions.
+            forward_batch: :class:`ForwardBatch` with attention metadata.
+
+        Returns:
+            Logits tensor of shape ``[num_tokens, vocab_size]``.
+        """
+        pixel_values = getattr(forward_batch, "pixel_values", None)
+        image_grid_thw = getattr(forward_batch, "image_grid_thw", None)
+
+        # ------------------------------------------------------------------
+        # Build 3-D M-RoPE positions
+        # ------------------------------------------------------------------
+        if forward_batch.forward_mode.is_extend():
+            # Prefill: compute per-sequence 3-D position IDs from input_ids
+            # and image grids, then store per-request deltas for future decode.
+            mrope_positions_list: List[torch.Tensor] = []
+            deltas: List[int] = []
+            image_idx_offset = 0
+
+            for i in range(forward_batch.batch_size):
+                start = int(forward_batch.extend_start_loc[i].item())
+                length = int(forward_batch.extend_seq_lens[i].item())
+                seq_ids = input_ids[start : start + length]
+
+                # Determine how many images belong to this sequence.
+                num_img = int((seq_ids == self.vision_start_token_id).sum().item())
+                if image_grid_thw is not None and num_img > 0:
+                    thw_seq = image_grid_thw[
+                        image_idx_offset : image_idx_offset + num_img
+                    ]
+                    image_idx_offset += num_img
+                else:
+                    thw_seq = None
+
+                pos3d, delta = get_rope_index(
+                    seq_ids,
+                    thw_seq,
+                    self.image_token_id,
+                    self.vision_start_token_id,
+                    self.spatial_merge_size,
+                )
+                mrope_positions_list.append(pos3d)
+                deltas.append(delta)
+
+            # Concatenate across sequences: [3, total_extend_tokens]
+            positions = torch.cat(mrope_positions_list, dim=1)
+            forward_batch.mrope_position_deltas = torch.tensor(
+                deltas, dtype=torch.int64, device=input_ids.device
+            )
+        else:
+            # Decode: each sequence emits exactly one token.  Apply the stored
+            # per-request delta so the position matches the image extent.
+            stored_deltas = getattr(forward_batch, "mrope_position_deltas", None)
+            if stored_deltas is not None:
+                pos_1d = forward_batch.positions + stored_deltas
+            else:
+                pos_1d = forward_batch.positions
+            positions = pos_1d.unsqueeze(0).expand(3, -1)  # [3, batch_size]
+
+        input_embeds = None
+        input_deepstack_embeds = None
+
+        if (
+            pixel_values is not None
+            and image_grid_thw is not None
+            and self.visual is not None
+            and not forward_batch.forward_mode.is_decode()
+        ):
+            # Run vision encoder
+            vision_features = self.visual(pixel_values, grid_thw=image_grid_thw)
+
+            # Separate main embeddings and deepstack embeddings
+            if self.num_deepstack_embeddings > 0:
+                vision_embeds = vision_features[:, : self._hidden_size]
+                deepstack_embeds = vision_features[:, self._hidden_size :]
+            else:
+                vision_embeds = vision_features
+                deepstack_embeds = None
+
+            # Get text embeddings and replace image tokens with vision features
+            input_embeds = self.model.embed_tokens(input_ids)
+            image_mask = input_ids == self.image_token_id
+            if image_mask.any():
+                input_embeds[image_mask] = vision_embeds.to(input_embeds.dtype)
+
+            # Build per-token deepstack embeddings
+            if deepstack_embeds is not None and image_mask.any():
+                input_deepstack_embeds = torch.zeros(
+                    input_embeds.shape[0],
+                    deepstack_embeds.shape[-1],
+                    dtype=input_embeds.dtype,
+                    device=input_embeds.device,
+                )
+                input_deepstack_embeds[image_mask] = deepstack_embeds.to(
+                    input_embeds.dtype
+                )
+
+        # Text decoder
+        hidden_states = self.model(
+            input_ids,
+            positions,
+            forward_batch,
+            input_embeds=input_embeds,
+            input_deepstack_embeds=input_deepstack_embeds,
+        )
+
+        # Prune hidden_states before lm_head to avoid a wasteful
+        # [total_tokens, vocab] matmul during prefill.  Following sglang's
+        # LogitsProcessor._get_pruned_states(): in extend mode only keep
+        # the last token of each sequence; in decode mode all rows are
+        # already one-per-sequence.
+        if forward_batch.forward_mode.is_extend():
+            if (
+                forward_batch.extend_start_loc is not None
+                and forward_batch.extend_seq_lens is not None
+            ):
+                last_index = (
+                    forward_batch.extend_start_loc + forward_batch.extend_seq_lens - 1
+                ).long()
+                hidden_states = hidden_states[last_index]
+            else:
+                hidden_states = hidden_states[-1:]
+
+        # LM head: always use weight matrix directly for the linear
+        # projection.  Works for both nn.Embedding (tied) and nn.Linear
+        # (untied).  Matches sglang LogitsProcessor._compute_lm_head().
+        logits = torch.matmul(
+            hidden_states.to(self.lm_head.weight.dtype),
+            self.lm_head.weight.T,
+        )
+
+        # Return LogitsProcessorOutput so that ModelRunner._process_logits
+        # skips redundant last-token gathering.
+        from pymllm.executor.model_runner import LogitsProcessorOutput
+
+        return LogitsProcessorOutput(next_token_logits=logits)
+
+    def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]) -> None:
+        """Load weights from a HuggingFace checkpoint.
+
+        Handles weight name remapping between HuggingFace Qwen3-VL
+        checkpoints and this model's parameter names.
+        """
+        stacked_params_mapping = [
+            # (param_name, weight_name, shard_id)
+            (".qkv_proj", ".q_proj", "q"),
+            (".qkv_proj", ".k_proj", "k"),
+            (".qkv_proj", ".v_proj", "v"),
+            (".gate_up_proj", ".up_proj", 1),
+            (".gate_up_proj", ".gate_proj", 0),
+        ]
+
+        params_dict = dict(self.named_parameters())
+
+        tie_word_embeddings = getattr(self.config, "tie_word_embeddings", False)
+
+        for name, loaded_weight in weights:
+            if "rotary_emb.inv_freq" in name:
+                continue
+
+            # When weights are tied, lm_head.weight is the same tensor as
+            # embed_tokens.weight — skip the duplicate from the checkpoint.
+            if tie_word_embeddings and "lm_head.weight" in name:
+                continue
+
+            name = _remap_weight_name(name)
+
+            # Handle language model stacked parameters (QKV, gate_up)
+            handled = False
+            for param_name, weight_name, shard_id in stacked_params_mapping:
+                if weight_name not in name or "visual" in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+                if name not in params_dict:
+                    continue
+                _load_stacked_weight(params_dict[name], loaded_weight, shard_id)
+                handled = True
+                break
+
+            if handled:
+                continue
+
+            # Handle vision encoder QKV stacking
+            if "visual" in name:
+                for qkv_key in (".attn.q.", ".attn.k.", ".attn.v."):
+                    if qkv_key not in name:
+                        continue
+                    qkv_name = name.replace(qkv_key, ".attn.qkv_proj.")
+                    if qkv_name in params_dict:
+                        shard = {"q": 0, "k": 1, "v": 2}[qkv_key[-2]]
+                        _load_vision_qkv_weight(
+                            params_dict[qkv_name], loaded_weight, shard
+                        )
+                        handled = True
+                    break
+
+            if handled:
+                continue
+
+            # Direct parameter loading
+            if name in params_dict:
+                param = params_dict[name]
+                if param.data.shape == loaded_weight.shape:
+                    param.data.copy_(loaded_weight)
+                else:
+                    logger.warning(
+                        "Shape mismatch: param %s (%s) vs loaded (%s), skipping.",
+                        name,
+                        param.data.shape,
+                        loaded_weight.shape,
+                    )
+
+
+# ---------------------------------------------------------------------------
+# Weight loading helpers
+# ---------------------------------------------------------------------------
+
+
+def _remap_weight_name(name: str) -> str:
+    """Remap HuggingFace weight names to pymllm parameter names."""
+    # transformers >= v4.52: model.language_model.* -> model.*
+    if name.startswith("model.language_model."):
+        name = name.replace("model.language_model.", "model.", 1)
+    # model.visual.* -> visual.*
+    elif name.startswith("model.visual."):
+        name = name.replace("model.visual.", "visual.", 1)
+
+    # Vision attention QKV renaming (fused weights in checkpoint)
+    if "visual" in name:
+        name = name.replace("attn.qkv.", "attn.qkv_proj.")
+
+    return name
+
+
+def _load_stacked_weight(
+    param: nn.Parameter,
+    loaded_weight: torch.Tensor,
+    shard_id,
+) -> None:
+    """Load one shard (q/k/v or gate/up) into a fused parameter.
+
+    For QKV with GQA (grouped-query attention), Q has a different size
+    from K and V.  The fused layout is ``[Q, K, V]`` where
+    ``Q_size = total - 2 * KV_size``.  We must use cumulative offsets
+    rather than ``idx * shard_size`` to handle the asymmetry correctly.
+    """
+    if isinstance(shard_id, str):
+        # QKV fused layout: [Q, K, V]
+        # Q may have a different size from K/V (GQA).
+        total_size = param.data.shape[0]
+        shard_size = loaded_weight.shape[0]
+        if shard_id == "q":
+            param.data[0:shard_size].copy_(loaded_weight)
+        elif shard_id == "k":
+            kv_size = shard_size
+            q_size = total_size - 2 * kv_size
+            param.data[q_size : q_size + kv_size].copy_(loaded_weight)
+        elif shard_id == "v":
+            kv_size = shard_size
+            q_size = total_size - 2 * kv_size
+            param.data[q_size + kv_size : q_size + 2 * kv_size].copy_(
+                loaded_weight
+            )
+    else:
+        # gate_up: 0 -> gate, 1 -> up (same size, idx*size is correct)
+        shard_size = loaded_weight.shape[0]
+        param.data[shard_id * shard_size : (shard_id + 1) * shard_size].copy_(
+            loaded_weight
+        )
+
+
+def _load_vision_qkv_weight(
+    param: nn.Parameter,
+    loaded_weight: torch.Tensor,
+    shard_idx: int,
+) -> None:
+    """Load a Q, K, or V weight shard into a fused QKV parameter."""
+    shard_size = param.data.shape[0] // 3
+    start = shard_idx * shard_size
+    param.data[start : start + shard_size].copy_(loaded_weight)
diff --git a/pymllm/orchestrator/async_disk_io_process.py b/pymllm/orchestrator/async_disk_io_process.py
deleted file mode 100644
index ef3fd5f0..00000000
--- a/pymllm/orchestrator/async_disk_io_process.py
+++ /dev/null
@@ -1,84 +0,0 @@
-"""
-AsyncDiskIoProcess -- optional subprocess for asynchronous disk I/O.
-
-Handles weight loading, checkpoint saving, or other heavy disk operations
-without blocking the scheduler or model runner.
-"""
-
-import logging
-from multiprocessing.connection import Connection
-from typing import Any, Dict, Optional
-
-import zmq
-
-from pymllm.orchestrator.ipc_utils import create_zmq_socket
-
-logger = logging.getLogger(__name__)
-
-
-class AsyncDiskIoProcess:
-    """Runs inside a subprocess.  Performs disk I/O on behalf of the scheduler."""
-
-    def __init__(self, recv_addr: str):
-        self._recv_addr = recv_addr
-
-        self._zmq_ctx: Optional[zmq.Context] = None
-        self._recv_sock: Optional[zmq.Socket] = None
-
-    # ------------------------------------------------------------------
-    # Lifecycle
-    # ------------------------------------------------------------------
-
-    def init_sockets(self) -> None:
-        self._zmq_ctx = zmq.Context()
-        self._recv_sock = create_zmq_socket(
-            self._zmq_ctx, zmq.PULL, self._recv_addr, bind=True,
-        )
-
-    def event_loop(self) -> None:
-        """Infinite loop: recv I/O request -> execute -> (optionally reply)."""
-        logger.info("AsyncDiskIoProcess event loop started")
-        while True:
-            io_request: Dict[str, Any] = self._recv_sock.recv_pyobj()
-            self._handle(io_request)
-
-    # ------------------------------------------------------------------
-    # I/O handling (placeholder)
-    # ------------------------------------------------------------------
-
-    def _handle(self, io_request: Dict[str, Any]) -> None:
-        """Dispatch an I/O request.
-
-        TODO: implement weight loading, checkpoint save, etc.
-        """
-        kind = io_request.get("kind")
-        logger.debug("AsyncDiskIoProcess received request kind=%s", kind)
-
-    # ------------------------------------------------------------------
-    # Cleanup
-    # ------------------------------------------------------------------
-
-    def shutdown(self) -> None:
-        if self._recv_sock is not None:
-            self._recv_sock.close()
-        if self._zmq_ctx is not None:
-            self._zmq_ctx.term()
-
-
-def run_async_disk_io_process(
-    recv_addr: str,
-    pipe_writer: Connection,
-) -> None:
-    """Entry point for ``torch.multiprocessing.Process(target=...)``."""
-    proc = AsyncDiskIoProcess(recv_addr)
-    proc.init_sockets()
-
-    pipe_writer.send({"status": "ready", "process": "async_disk_io"})
-    pipe_writer.close()
-
-    try:
-        proc.event_loop()
-    except KeyboardInterrupt:
-        pass
-    finally:
-        proc.shutdown()
diff --git a/pymllm/orchestrator/detokenizer_process.py b/pymllm/orchestrator/detokenizer_process.py
index e9d5184b..c2154e44 100644
--- a/pymllm/orchestrator/detokenizer_process.py
+++ b/pymllm/orchestrator/detokenizer_process.py
@@ -12,7 +12,7 @@
 
 import zmq
 
-from pymllm.orchestrator.ipc_utils import create_zmq_socket
+from pymllm.orchestrator.ipc_utils import create_zmq_socket, setup_subprocess_logging
 
 logger = logging.getLogger(__name__)
 
@@ -24,16 +24,19 @@ def __init__(
         self,
         recv_from_scheduler_addr: str,
         send_to_rr_addr: str,
+        tokenizer_cfg: Optional[Dict[str, Any]] = None,
     ):
         self._recv_from_scheduler_addr = recv_from_scheduler_addr
         self._send_to_rr_addr = send_to_rr_addr
+        self._tokenizer_cfg = tokenizer_cfg or {}
 
         self._zmq_ctx: Optional[zmq.Context] = None
         self._recv_from_scheduler: Optional[zmq.Socket] = None
         self._send_to_rr: Optional[zmq.Socket] = None
 
-        # TODO: initialise the tokenizer (needed for decode)
         self._tokenizer = None
+        # Track previous decoded text per rid for incremental (delta) output
+        self._rid_to_prev_text: Dict[str, str] = {}
 
     # ------------------------------------------------------------------
     # Lifecycle
@@ -54,32 +57,102 @@ def init_sockets(self) -> None:
             bind=False,
         )
 
+    def init_tokenizer(self) -> None:
+        """Load the tokenizer from the configured path."""
+        tokenizer_path = self._tokenizer_cfg.get("tokenizer_path")
+        if tokenizer_path is None:
+            logger.warning(
+                "No tokenizer_path in tokenizer_cfg; detokenization disabled"
+            )
+            return
+
+        from transformers import AutoTokenizer
+
+        trust_remote_code = self._tokenizer_cfg.get("trust_remote_code", False)
+        self._tokenizer = AutoTokenizer.from_pretrained(
+            tokenizer_path,
+            trust_remote_code=trust_remote_code,
+        )
+        logger.info("Detokenizer loaded tokenizer from %s", tokenizer_path)
+
     def event_loop(self) -> None:
         """Infinite loop: recv token IDs -> detokenize -> send text to RR."""
         logger.info("DetokenizerProcess event loop started")
         while True:
             token_id_out = self._recv_from_scheduler.recv_pyobj()
-            str_out = self._detokenize(token_id_out)
-            self._send_to_rr.send_pyobj(str_out)
+            results = self._detokenize(token_id_out)
+            for result in results:
+                self._send_to_rr.send_pyobj(result)
 
     # ------------------------------------------------------------------
-    # Detokenization (placeholder)
+    # Detokenization
     # ------------------------------------------------------------------
 
-    def _detokenize(self, token_id_out: Dict[str, Any]) -> Dict[str, Any]:
-        """Convert token IDs to text.
+    def _detokenize(self, token_id_out: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """Convert token IDs to text and fan out one result per rid.
 
-        TODO: replace with real tokenizer.decode() call and incremental
-        detokenization logic.
+        The scheduler sends a batch dict with parallel lists keyed by
+        ``"rids"``, ``"output_ids"``, ``"finished_reasons"``, etc.
+        This method decodes each rid's output_ids and produces one result
+        dict per rid with keys ``"rid"`` (singular) and ``"finished"``
+        (bool) as expected by ``RequestResponseProcess._recv_loop``.
         """
-        output_ids: List[int] = token_id_out.get("output_token_ids", [])
-        # placeholder: join ids as string
-        text = ""  # TODO: self._tokenizer.decode(output_ids)
-        return {
-            "rid": token_id_out.get("rid"),
-            "text": text,
-            "output_token_ids": output_ids,
-        }
+        rids: List[str] = token_id_out.get("rids", [])
+        output_ids: List[int] = token_id_out.get("output_ids", [])
+        finished_reasons: List[Optional[str]] = token_id_out.get("finished_reasons", [])
+        decode_ids: List[int] = token_id_out.get("decode_ids", [])
+        skip_special_tokens_list: List[bool] = token_id_out.get(
+            "skip_special_tokens", []
+        )
+        prompt_tokens_list: List[int] = token_id_out.get("prompt_tokens", [])
+        completion_tokens_list: List[int] = token_id_out.get("completion_tokens", [])
+
+        results: List[Dict[str, Any]] = []
+
+        for i, rid in enumerate(rids):
+            finished_reason = finished_reasons[i] if i < len(finished_reasons) else None
+            is_finished = finished_reason is not None
+            skip_special = (
+                skip_special_tokens_list[i]
+                if i < len(skip_special_tokens_list)
+                else True
+            )
+            prompt_tokens = prompt_tokens_list[i] if i < len(prompt_tokens_list) else 0
+            completion_tokens = (
+                completion_tokens_list[i] if i < len(completion_tokens_list) else 0
+            )
+
+            # Decode text from output_ids
+            if self._tokenizer is not None:
+                text = self._tokenizer.decode(
+                    output_ids,
+                    skip_special_tokens=skip_special,
+                )
+            else:
+                text = ""
+
+            # Compute incremental delta by diffing against previous text
+            prev_text = self._rid_to_prev_text.get(rid, "")
+            delta_text = text[len(prev_text):]
+            self._rid_to_prev_text[rid] = text
+
+            # Clean up tracking when request finishes
+            if is_finished:
+                self._rid_to_prev_text.pop(rid, None)
+
+            result: Dict[str, Any] = {
+                "rid": rid,
+                "text": text,
+                "delta": delta_text,
+                "output_token_ids": list(output_ids),
+                "finished": is_finished,
+                "finished_reason": finished_reason,
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+            }
+            results.append(result)
+
+        return results
 
     # ------------------------------------------------------------------
     # Cleanup
@@ -98,10 +171,17 @@ def run_detokenizer_process(
     recv_from_scheduler_addr: str,
     send_to_rr_addr: str,
     pipe_writer: Connection,
+    tokenizer_cfg: Optional[Dict[str, Any]] = None,
 ) -> None:
     """Entry point for ``torch.multiprocessing.Process(target=...)``."""
-    proc = DetokenizerProcess(recv_from_scheduler_addr, send_to_rr_addr)
+    setup_subprocess_logging((tokenizer_cfg or {}).get("log_level", "info"))
+    proc = DetokenizerProcess(
+        recv_from_scheduler_addr,
+        send_to_rr_addr,
+        tokenizer_cfg=tokenizer_cfg,
+    )
     proc.init_sockets()
+    proc.init_tokenizer()
 
     pipe_writer.send({"status": "ready", "process": "detokenizer"})
     pipe_writer.close()
diff --git a/pymllm/orchestrator/ipc_utils.py b/pymllm/orchestrator/ipc_utils.py
index faaf7a6d..b464a397 100644
--- a/pymllm/orchestrator/ipc_utils.py
+++ b/pymllm/orchestrator/ipc_utils.py
@@ -4,6 +4,7 @@
 ZMQ sockets so that every process uses the same conventions.
 """
 
+import logging
 import os
 import tempfile
 from typing import Optional
@@ -68,3 +69,24 @@ def close_zmq_socket(sock: zmq.Socket) -> None:
         sock.close()
     except zmq.ZMQError:
         pass
+
+
+def setup_subprocess_logging(log_level: str = "info") -> None:
+    """Configure logging for a spawned subprocess.
+
+    When Python spawns a subprocess (``mp.set_start_method('spawn')``), the
+    child starts with a blank logging configuration.  Call this function at the
+    very beginning of every subprocess entry point so that log records are
+    emitted at the correct level.
+
+    Parameters
+    ----------
+    log_level
+        Case-insensitive level name, e.g. ``"debug"``, ``"info"``, ``"warning"``.
+    """
+    level = getattr(logging, log_level.upper(), logging.INFO)
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+    logging.getLogger("pymllm").setLevel(level)
diff --git a/pymllm/orchestrator/model_runner_process.py b/pymllm/orchestrator/model_runner_process.py
index b60966dd..d850dd53 100644
--- a/pymllm/orchestrator/model_runner_process.py
+++ b/pymllm/orchestrator/model_runner_process.py
@@ -1,143 +1,968 @@
 """
-ModelRunnerProcess -- subprocess that executes model forward passes.
+ModelRunnerProcess -- GPU-owning component that executes model forward passes.
 
-Receives batches from the SchedulerProcess, runs the model forward + sampling,
-and returns the results (logits, next_token_ids) back to the scheduler.
+Instantiated **in-process** by :class:`SchedulerProcess` (sglang-style
+architecture).  The scheduler calls :meth:`_forward_batch` directly —
+no inter-process communication is involved.
+
+This component owns the GPU: it holds a :class:`ModelRunner` with model
+weights, KV-cache memory pools, and the attention backend.  It also owns
+the :class:`RadixCache` for prefix-aware KV reuse.
+
+RadixCache lifecycle
+--------------------
+1. **match_prefix** — called during ``_allocate_extend`` before KV allocation.
+2. **inc_lock_ref** — locks matched radix-tree nodes to prevent eviction.
+3. **insert (prefill)** — inserts prompt KV indices after prefill.
+4. **insert (completion)** — re-inserts the full sequence when a request finishes.
+5. **dec_lock_ref** — unlocks radix-tree nodes when a request is freed.
+6. **evict** — called when KV allocation fails to free stale cache entries.
 """
 
 import logging
-from multiprocessing.connection import Connection
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
 
-import zmq
+import torch
 
-from pymllm.orchestrator.ipc_utils import create_zmq_socket
+from pymllm.mem_cache.radix_cache import RadixCache, RadixKey, TreeNode
 
 logger = logging.getLogger(__name__)
 
+# Fraction of KV pool to try evicting when allocation fails.
+_EVICT_FRACTION = 0.10
+# Maximum number of eviction retries before giving up.
+_MAX_EVICT_RETRIES = 3
+
 
 class ModelRunnerProcess:
-    """Runs inside a subprocess.  Owns the model and performs forward passes."""
+    """GPU-owning component created in-process by SchedulerProcess."""
 
     def __init__(
         self,
-        recv_from_scheduler_addr: str,
-        send_to_scheduler_addr: str,
+        gpu_id: int = 0,
+        server_config: Optional[Any] = None,
+        model_config: Optional[Any] = None,
     ):
-        self._recv_from_scheduler_addr = recv_from_scheduler_addr
-        self._send_to_scheduler_addr = send_to_scheduler_addr
+        self._gpu_id = gpu_id
+        self._server_config = server_config
+        self._model_config = model_config
+
+        # The ModelRunner instance (created in init_model)
+        self._runner = None
+        self._is_hybrid: bool = False
+
+        # RadixCache instance (created in init_model, after memory pools)
+        self._radix_cache: Optional[RadixCache] = None
 
-        self._zmq_ctx: Optional[zmq.Context] = None
-        self._recv_from_scheduler: Optional[zmq.Socket] = None
-        self._send_to_scheduler: Optional[zmq.Socket] = None
+        # GPU resource tracking: maps rid -> req_pool_idx (slot in ReqToTokenPool)
+        self._rid_to_req_pool_idx: Dict[str, int] = {}
+        # Maps rid -> kv_indices tensor (all KV-cache token indices for this request)
+        self._rid_to_kv_indices: Dict[str, torch.Tensor] = {}
+        # Maps rid -> input_ids used for prefill (needed for radix cache insert)
+        self._rid_to_input_ids: Dict[str, List[int]] = {}
+        # Maps rid -> list of generated (decode) token ids, appended each step.
+        # Used to build the full sequence for radix cache insert at completion.
+        self._rid_to_output_ids: Dict[str, List[int]] = {}
+        # Maps rid -> cache_protected_len: the length of the prefix that has
+        # already been inserted into the radix cache.  When insert() returns
+        # prefix_len > cache_protected_len, the KV indices in the overlap
+        # range [cache_protected_len, prefix_len) are duplicates that must
+        # be freed from the allocator (the tree already holds cloned copies).
+        self._rid_to_cache_protected_len: Dict[str, int] = {}
+        # Maps rid -> (last_node, swa_boundary_id) for radix cache lock tracking
+        self._rid_to_radix_lock: Dict[str, Tuple[TreeNode, Optional[int]]] = {}
+        # Maps rid -> mrope_position_delta (M-RoPE positional offset per request)
+        # Populated during prefill; used to offset decode-step positions for
+        # multimodal models (Qwen3-VL) that consume more position indices than
+        # tokens due to 3-D image grid positions.
+        self._rid_to_mrope_delta: Dict[str, int] = {}
 
-        # TODO: initialise model, attention backend, memory pool, etc.
-        self._model = None
+        # GDN prefix cache state tracking (hybrid models only):
+        # Maps rid -> GDN track slot index in GDNPool (for snapshotting state)
+        self._rid_to_gdn_track_slot: Dict[str, int] = {}
+        # Maps radix tree node id -> GDN track slot index
+        self._node_id_to_gdn_track_slot: Dict[int, int] = {}
 
     # ------------------------------------------------------------------
     # Lifecycle
     # ------------------------------------------------------------------
 
-    def init_sockets(self) -> None:
-        self._zmq_ctx = zmq.Context()
-        self._recv_from_scheduler = create_zmq_socket(
-            self._zmq_ctx,
-            zmq.PULL,
-            self._recv_from_scheduler_addr,
-            bind=False,
+    def init_model(self) -> None:
+        """Create and initialise the ModelRunner and RadixCache.
+
+        Must run inside the subprocess (after spawn) since it does CUDA init.
+        """
+        from pymllm.executor.model_runner import ModelRunner
+
+        logger.info(
+            "ModelRunnerProcess: initialising ModelRunner on GPU %d",
+            self._gpu_id,
         )
-        self._send_to_scheduler = create_zmq_socket(
-            self._zmq_ctx,
-            zmq.PUSH,
-            self._send_to_scheduler_addr,
-            bind=False,
+        self._runner = ModelRunner(
+            server_config=self._server_config,
+            model_config=self._model_config,
+            gpu_id=self._gpu_id,
         )
+        self._runner.initialize()
 
-    def event_loop(self) -> None:
-        """Infinite loop: recv batch -> forward -> sample -> send result."""
-        logger.info("ModelRunnerProcess event loop started")
-        while True:
-            batch = self._recv_from_scheduler.recv_pyobj()
-            result = self._forward_batch(batch)
-            self._send_to_scheduler.send_pyobj(result)
+        # Initialise RadixCache after memory pools are ready.
+        disable_cache = getattr(self._server_config, "disable_radix_cache", False)
+        self._is_hybrid = self._runner.num_gdn_layers > 0
+        if self._is_hybrid and not disable_cache:
+            logger.info(
+                "ModelRunnerProcess: prefix caching ENABLED with GDN state "
+                "tracking (%d GDN layers)",
+                self._runner.num_gdn_layers,
+            )
+        sliding_window = self._runner.sliding_window_size
+        page_size = getattr(self._server_config, "radix_cache_page_size", 1)
+        # For hybrid models, register an eviction callback so that evicted
+        # radix nodes free their associated GDN track slots.
+        evict_cb = self._on_radix_node_evict if self._is_hybrid else None
+        self._radix_cache = RadixCache(
+            page_size=page_size,
+            sliding_window_size=sliding_window,
+            disable=disable_cache,
+            token_to_kv_pool_allocator=self._runner.token_to_kv_pool_allocator,
+            on_node_evict=evict_cb,
+        )
+        logger.info(
+            "ModelRunnerProcess: RadixCache initialized "
+            "(disable=%s, sliding_window=%s)",
+            disable_cache,
+            sliding_window,
+        )
+        logger.info("ModelRunnerProcess: ModelRunner ready")
 
     # ------------------------------------------------------------------
-    # Forward pass (placeholder)
+    # Forward pass
     # ------------------------------------------------------------------
 
     def _forward_batch(self, batch: Dict[str, Any]) -> Dict[str, Any]:
         """Run the model forward pass and sampling for *batch*.
 
-        *batch* is a dict produced by ``SchedulerProcess.get_next_batch_to_run``
-        whose ``"requests"`` list contains
-        :class:`~pymllm.engine.io_struct.TokenizedGenerateReqInput` objects.
-
-        Returns a dict ``{"batch_id": ..., "finished": [...], "unfinished": [...]}``
-        where each element of *finished* / *unfinished* is a plain output dict
-        containing at least ``"rid"`` and ``"output_token_ids"``.
+        *batch* is a dict produced by ``ScheduleBatch.to_batch_dict()``
+        containing ``"forward_mode"``, ``"input_ids"``, ``"seq_lens"``,
+        ``"req_pool_indices"``, ``"requests"`` (metadata list), etc.
 
-        TODO: implement real forward pass, logits processing, and sampling.
+        Implements 6 phases:
+        1. Cleanup: free GPU resources for rids no longer in the batch
+        2. Prefix matching + KV allocation
+        3. Build GPU tensors
+        4. Forward + sample
+        5. Radix cache insert (extend only)
+        6. Build result dict
         """
-        requests = batch.get("requests", [])
-        finished: List[Dict[str, Any]] = []
-        unfinished: List[Dict[str, Any]] = []
+        runner = self._runner
+        forward_mode = batch.get("forward_mode", "decode")
+        batch_size = batch.get("batch_size", 0)
+        requests_meta: List[Dict[str, Any]] = batch.get("requests", [])
+
+        if batch_size == 0:
+            return {"batch_id": batch.get("batch_id"), "outputs": []}
+
+        device = runner.device
+
+        # Collect current batch rids
+        current_rids: Set[str] = {m["rid"] for m in requests_meta}
+
+        # ==============================================================
+        # Phase 2: Prefix matching + KV allocation
+        # ==============================================================
+        # For extend batches, match_prefix is done inside _allocate_extend
+        # which may update extend_prefix_lens and extend_seq_lens.
+        if forward_mode == "extend":
+            out_cache_loc, actual_prefix_lens, actual_extend_lens = (
+                self._allocate_extend(batch, requests_meta)
+            )
+        else:
+            out_cache_loc = self._allocate_decode(batch, requests_meta)
+            actual_prefix_lens = None
+            actual_extend_lens = None
+
+        # ==============================================================
+        # Phase 3: Build GPU tensors
+        # ==============================================================
+        if forward_mode == "extend" and actual_prefix_lens is not None:
+            # Rebuild input_ids and seq_lens using actual prefix matches.
+            # The scheduler sent tokens assuming prefix_len=0; we need to
+            # trim the input_ids to skip the prefix-matched tokens.
+            (
+                input_ids_tensor,
+                seq_lens_tensor,
+                extend_seq_lens_t,
+                extend_prefix_lens_t,
+            ) = self._rebuild_extend_tensors(
+                batch, requests_meta, actual_prefix_lens, actual_extend_lens, device
+            )
+        else:
+            input_ids_list: List[int] = batch["input_ids"]
+            seq_lens_list: List[int] = batch["seq_lens"]
+            input_ids_tensor = torch.tensor(
+                input_ids_list, dtype=torch.int32, device=device
+            )
+            seq_lens_tensor = torch.tensor(
+                seq_lens_list, dtype=torch.int32, device=device
+            )
+            extend_seq_lens_t = None
+            extend_prefix_lens_t = None
+
+        # Build req_pool_indices from our own tracking (NOT from scheduler)
+        req_pool_indices = torch.tensor(
+            [self._rid_to_req_pool_idx[m["rid"]] for m in requests_meta],
+            dtype=torch.int64,
+            device=device,
+        )
+
+        out_cache_loc = out_cache_loc.to(torch.int64)
 
-        for req in requests:
-            # Support both TokenizedGenerateReqInput dataclass (normal path) and
-            # legacy plain dicts (defensive).
-            rid: str = req.rid if hasattr(req, "rid") else req.get("rid")
-            input_ids: List[int] = (
-                req.input_ids if hasattr(req, "input_ids") else req.get("input_ids", [])
+        # ==============================================================
+        # Phase 4: Forward + sample
+        # ==============================================================
+        # Extract per-request sampling params
+        temperatures = []
+        top_ps = []
+        top_ks = []
+        for m in requests_meta:
+            sp = m.get("sampling_params") or {}
+            temperatures.append(sp.get("temperature", 1.0))
+            top_ps.append(sp.get("top_p", 1.0))
+            top_ks.append(sp.get("top_k", -1))
+
+        temps_tensor = torch.tensor(temperatures, dtype=torch.float32, device=device)
+        top_ps_tensor = torch.tensor(top_ps, dtype=torch.float32, device=device)
+        top_ks_tensor = torch.tensor(top_ks, dtype=torch.int32, device=device)
+
+        if forward_mode == "extend":
+            if extend_seq_lens_t is None:
+                extend_seq_lens_list: List[int] = batch["extend_seq_lens"]
+                extend_prefix_lens_list: List[int] = batch["extend_prefix_lens"]
+                extend_seq_lens_t = torch.tensor(
+                    extend_seq_lens_list, dtype=torch.int32, device=device
+                )
+                extend_prefix_lens_t = torch.tensor(
+                    extend_prefix_lens_list, dtype=torch.int32, device=device
+                )
+
+            fb = runner.prepare_forward_batch_extend(
+                input_ids=input_ids_tensor,
+                req_pool_indices=req_pool_indices,
+                seq_lens=seq_lens_tensor,
+                extend_seq_lens=extend_seq_lens_t,
+                extend_prefix_lens=extend_prefix_lens_t,
+                out_cache_loc=out_cache_loc,
             )
-            mm_inputs: Optional[Dict[str, Any]] = (
-                req.mm_inputs if hasattr(req, "mm_inputs") else req.get("mm_inputs")
+
+            # Attach multimodal vision inputs to ForwardBatch so the
+            # model's vision encoder can process images during prefill.
+            # The tokenizer wraps processor output under "image_inputs";
+            # fall back to top-level keys for direct dicts.
+            pixel_values_list = []
+            image_grid_thw_list = []
+            for m in requests_meta:
+                mm = m.get("mm_inputs")
+                if mm is None:
+                    continue
+                # AutoProcessor output is nested under "image_inputs"
+                src = mm.get("image_inputs") if "image_inputs" in mm else mm
+                if src is None:
+                    continue
+                pv = src.get("pixel_values") if hasattr(src, "get") else getattr(src, "pixel_values", None)
+                thw = src.get("image_grid_thw") if hasattr(src, "get") else getattr(src, "image_grid_thw", None)
+                if pv is not None:
+                    if not isinstance(pv, torch.Tensor):
+                        pv = torch.as_tensor(pv)
+                    pixel_values_list.append(pv.to(device=device))
+                if thw is not None:
+                    if not isinstance(thw, torch.Tensor):
+                        thw = torch.as_tensor(thw)
+                    image_grid_thw_list.append(thw.to(device=device))
+            if pixel_values_list:
+                fb.pixel_values = torch.cat(pixel_values_list, dim=0)
+            if image_grid_thw_list:
+                fb.image_grid_thw = torch.cat(image_grid_thw_list, dim=0)
+        else:
+            # Build mrope_position_deltas tensor for decode batches.
+            mrope_deltas = [
+                self._rid_to_mrope_delta.get(m["rid"], 0) for m in requests_meta
+            ]
+            mrope_deltas_tensor = torch.tensor(
+                mrope_deltas, dtype=torch.int64, device=device
             )
 
-            # TODO: actual model forward; pass input_ids and mm_inputs to the model.
-            next_token_ids: List[int] = []  # placeholder
+            fb = runner.prepare_forward_batch_decode(
+                input_ids=input_ids_tensor,
+                req_pool_indices=req_pool_indices,
+                seq_lens=seq_lens_tensor,
+                out_cache_loc=out_cache_loc,
+                mrope_position_deltas=mrope_deltas_tensor,
+            )
 
-            output: Dict[str, Any] = {
+        logits_output = runner.forward(fb)
+
+        # Persist M-RoPE position deltas for multimodal models (Qwen3-VL).
+        # The model sets mrope_position_deltas on the ForwardBatch during
+        # prefill; we store them here so decode steps can retrieve them.
+        if (
+            forward_mode == "extend"
+            and getattr(fb, "mrope_position_deltas", None) is not None
+        ):
+            deltas_cpu = fb.mrope_position_deltas.cpu().tolist()
+            for idx, m in enumerate(requests_meta):
+                self._rid_to_mrope_delta[m["rid"]] = int(deltas_cpu[idx])
+
+        next_token_ids = runner.sample(
+            logits_output,
+            fb,
+            temperatures=temps_tensor,
+            top_ps=top_ps_tensor,
+            top_ks=top_ks_tensor,
+        )
+
+        # ==============================================================
+        # Phase 4.5: Snapshot GDN state after extend (hybrid models)
+        # ==============================================================
+        if forward_mode == "extend" and self._is_hybrid:
+            self._track_gdn_state_after_extend(requests_meta)
+
+        # ==============================================================
+        # Phase 5: Radix cache insert (extend only)
+        # ==============================================================
+        if forward_mode == "extend" and self._radix_cache is not None:
+            self._insert_into_radix_cache(requests_meta)
+
+        # ==============================================================
+        # Phase 6: Build result & track output tokens
+        # ==============================================================
+        next_ids_cpu = next_token_ids.cpu().tolist()
+        outputs: List[Dict[str, Any]] = []
+        for i, m in enumerate(requests_meta):
+            rid = m["rid"]
+            token_id = next_ids_cpu[i] if i < len(next_ids_cpu) else 0
+            # Track output tokens for radix cache insert at completion
+            out_ids = self._rid_to_output_ids.get(rid)
+            if out_ids is not None:
+                out_ids.append(token_id)
+
+            out: Dict[str, Any] = {
                 "rid": rid,
-                "output_token_ids": next_token_ids,
-                "finished": True,
+                "output_token_ids": [token_id],
             }
-            # TODO: check EOS / max_tokens to decide finished vs. unfinished.
-            finished.append(output)
+            # Report actual prefix_len back to the scheduler so it can
+            # update its token budget tracking accurately.
+            if actual_prefix_lens is not None:
+                out["prefix_len"] = actual_prefix_lens[i]
+            outputs.append(out)
 
         return {
             "batch_id": batch.get("batch_id"),
-            "finished": finished,
-            "unfinished": unfinished,
+            "outputs": outputs,
         }
 
+    # ------------------------------------------------------------------
+    # Tensor rebuild for prefix-matched extend
+    # ------------------------------------------------------------------
+
+    def _rebuild_extend_tensors(
+        self,
+        batch: Dict[str, Any],
+        requests_meta: List[Dict[str, Any]],
+        actual_prefix_lens: List[int],
+        actual_extend_lens: List[int],
+        device: str,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Rebuild input_ids and related tensors after prefix matching.
+
+        The scheduler sent input_ids assuming no prefix cache hit.  After
+        radix cache matching, we know the actual prefix lengths and must
+        trim the input_ids accordingly.
+
+        Returns (input_ids, seq_lens, extend_seq_lens, extend_prefix_lens)
+        as GPU tensors.
+        """
+        # Reconstruct trimmed input_ids: for each request, take only the
+        # tokens beyond the matched prefix.
+        new_input_ids: List[int] = []
+        seq_lens_list: List[int] = batch["seq_lens"]
+
+        for i, m in enumerate(requests_meta):
+            full_input_ids = m.get("input_ids", [])
+            prefix_len = actual_prefix_lens[i]
+            # Only send tokens after the prefix
+            new_input_ids.extend(full_input_ids[prefix_len:])
+
+        input_ids = torch.tensor(new_input_ids, dtype=torch.int32, device=device)
+        seq_lens = torch.tensor(seq_lens_list, dtype=torch.int32, device=device)
+        extend_seq_lens = torch.tensor(
+            actual_extend_lens, dtype=torch.int32, device=device
+        )
+        extend_prefix_lens = torch.tensor(
+            actual_prefix_lens, dtype=torch.int32, device=device
+        )
+        return input_ids, seq_lens, extend_seq_lens, extend_prefix_lens
+
+    # ------------------------------------------------------------------
+    # Radix cache insert
+    # ------------------------------------------------------------------
+
+    def _insert_into_radix_cache(self, requests_meta: List[Dict[str, Any]]) -> None:
+        """Insert prefill KV indices into the radix cache for future reuse.
+
+        Mirrors sglang's ``cache_unfinished_req`` pattern:
+
+        1. **Insert** the request's token → KV index mapping into the tree.
+        2. **Free duplicates** — indices in ``[cache_protected_len, new_prefix_len)``
+           are now owned by the tree; the request's copies are redundant.
+        3. **Re-match + write-back** — fetch the tree's *own* indices via
+           ``match_prefix`` and write them into ``req_to_token_pool``,
+           replacing the just-freed entries.  Without this step the pool
+           still points at freed slots → use-after-free during decode.
+        4. **Update** ``cache_protected_len`` and radix lock.
+        """
+        cache = self._radix_cache
+        if cache is None or cache.disable:
+            return
+
+        runner = self._runner
+        gdn_pool = getattr(runner, "gdn_pool", None)
+
+        for m in requests_meta:
+            rid = m["rid"]
+            input_ids = self._rid_to_input_ids.get(rid)
+            if input_ids is None:
+                continue
+
+            slot = self._rid_to_req_pool_idx.get(rid)
+            if slot is None:
+                continue
+
+            seq_len = len(input_ids)
+            kv_indices = runner.req_to_token_pool.req_to_token[slot, :seq_len].to(
+                torch.int64
+            )
+
+            key = RadixKey(input_ids)
+            result = cache.insert(key, kv_indices)
+            new_prefix_len = result.prefix_len
+
+            # --- Step 2: free duplicates ---
+            cache_protected_len = self._rid_to_cache_protected_len.get(rid, 0)
+            if new_prefix_len > cache_protected_len:
+                dup_indices = kv_indices[cache_protected_len:new_prefix_len]
+                if dup_indices.numel() > 0:
+                    runner.token_to_kv_pool_allocator.free(dup_indices)
+
+            # --- Step 3: re-match + write-back ---
+            # The tree now owns indices for [0, new_prefix_len).  Fetch them
+            # and patch req_to_token_pool so the request reads the tree's
+            # (still-live) indices instead of the freed ones.
+            rematch = cache.match_prefix(key)
+            new_indices = rematch.indices
+            if len(new_indices) > cache_protected_len:
+                runner.req_to_token_pool.write(
+                    (slot, slice(cache_protected_len, len(new_indices))),
+                    new_indices[cache_protected_len:].to(torch.int32),
+                )
+
+            # --- Step 4: update tracking ---
+            self._rid_to_cache_protected_len[rid] = len(new_indices)
+
+            # Update radix lock to cover the new (potentially deeper) node.
+            old_lock = self._rid_to_radix_lock.pop(rid, None)
+            if old_lock is not None:
+                old_node, old_swa = old_lock
+                cache.dec_lock_ref(old_node, old_swa)
+            new_last_node = rematch.last_node
+            if new_last_node is not None and len(new_indices) > 0:
+                swa_id = cache.inc_lock_ref(new_last_node)
+                self._rid_to_radix_lock[rid] = (new_last_node, swa_id)
+
+            # --- GDN track slot association (hybrid models) ---
+            if gdn_pool is not None and result.last_node is not None:
+                track_slot = self._rid_to_gdn_track_slot.get(rid)
+                if track_slot is not None:
+                    node_id = result.last_node.id
+                    old_ts = self._node_id_to_gdn_track_slot.get(node_id)
+                    if old_ts is None:
+                        self._node_id_to_gdn_track_slot[node_id] = track_slot
+                    else:
+                        gdn_pool.free_track_slot(track_slot)
+                        self._rid_to_gdn_track_slot.pop(rid, None)
+
+    # ------------------------------------------------------------------
+    # KV allocation helpers
+    # ------------------------------------------------------------------
+
+    def _allocate_extend(
+        self, batch: Dict[str, Any], requests_meta: List[Dict[str, Any]]
+    ) -> Tuple[torch.Tensor, List[int], List[int]]:
+        """Allocate req pool slots and KV tokens for an extend (prefill) batch.
+
+        Performs radix cache prefix matching before allocation:
+        1. For each request, call ``match_prefix`` to find cached KV indices.
+        2. Write cached indices into ``ReqToTokenPool``.
+        3. Only allocate new KV tokens for the non-cached suffix.
+        4. Lock matched radix nodes to prevent eviction.
+
+        Returns ``(out_cache_loc, actual_prefix_lens, actual_extend_lens)``.
+        ``out_cache_loc`` has shape ``[total_new_tokens]``.
+        """
+        runner = self._runner
+        cache = self._radix_cache
+        batch_size = batch["batch_size"]
+        seq_lens: List[int] = batch["seq_lens"]
+
+        # --- Step 1: Radix cache prefix matching ---
+        actual_prefix_lens: List[int] = []
+        actual_extend_lens: List[int] = []
+        matched_nodes: List[Optional[TreeNode]] = []
+        # Cache the match results so we don't call match_prefix twice
+        cached_indices_list: List[Optional[torch.Tensor]] = []
+        gdn_pool = getattr(runner, "gdn_pool", None)
+
+        for i, m in enumerate(requests_meta):
+            full_input_ids: List[int] = m.get("input_ids", [])
+            full_seq_len = seq_lens[i]
+
+            # Store input_ids for later radix cache insert
+            self._rid_to_input_ids[m["rid"]] = full_input_ids
+
+            if cache is not None and not cache.disable and len(full_input_ids) > 0:
+                key = RadixKey(full_input_ids)
+                match_result = cache.match_prefix(key)
+                prefix_len = match_result.prefix_len
+                last_node = match_result.last_node
+                cached_indices = match_result.indices
+            else:
+                prefix_len = 0
+                last_node = None
+                cached_indices = None
+
+            # Hybrid model guard: only use a KV cache hit if the matched
+            # node has a GDN state snapshot.  Without it, the full-attention
+            # layers would use cached KV while GDN layers start from zero,
+            # causing an attention/GDN state mismatch.  Discard the hit so
+            # the entire prompt is processed from scratch.
+            if (
+                gdn_pool is not None
+                and prefix_len > 0
+                and last_node is not None
+                and self._node_id_to_gdn_track_slot.get(last_node.id) is None
+            ):
+                logger.debug(
+                    "Discarding radix cache hit for rid=%s: no GDN state "
+                    "for matched node (prefix_len=%d)",
+                    m["rid"], prefix_len,
+                )
+                prefix_len = 0
+                last_node = None
+                cached_indices = None
+
+            # Ensure at least 1 token is extended (not fully cached).
+            # A full cache hit (prefix_len == full_seq_len) would produce a
+            # 0-length input tensor that crashes CUDA kernels.  Back off by 1
+            # so the model always sees the last token.
+            if prefix_len >= full_seq_len:
+                prefix_len = full_seq_len - 1
+                if cached_indices is not None:
+                    cached_indices = cached_indices[:prefix_len]
+
+            extend_len = full_seq_len - prefix_len
+            actual_prefix_lens.append(prefix_len)
+            actual_extend_lens.append(extend_len)
+            matched_nodes.append(last_node)
+            cached_indices_list.append(cached_indices)
+
+            if prefix_len > 0:
+                logger.info(
+                    "Radix cache hit for rid=%s: %d/%d tokens reused (%.1f%%)",
+                    m["rid"],
+                    prefix_len,
+                    full_seq_len,
+                    100.0 * prefix_len / full_seq_len,
+                )
+
+        total_new_tokens = sum(actual_extend_lens)
+
+        # --- Step 2: Allocate req pool slots ---
+        slots = runner.req_to_token_pool.alloc(batch_size)
+        if slots is None:
+            raise RuntimeError("Failed to allocate req pool slots for extend batch")
+
+        # --- Step 3: Allocate KV tokens (with eviction retry) ---
+        out_cache_loc = self._alloc_kv_with_eviction(total_new_tokens)
+        if out_cache_loc is None:
+            for s in slots:
+                runner.req_to_token_pool.free(s)
+            raise RuntimeError(
+                f"Failed to allocate {total_new_tokens} KV tokens for extend batch "
+                f"(even after eviction)"
+            )
+
+        # --- Step 4: Write indices into req_to_token_pool ---
+        offset = 0
+        for i, m in enumerate(requests_meta):
+            rid = m["rid"]
+            slot = slots[i]
+            prefix_len = actual_prefix_lens[i]
+            extend_len = actual_extend_lens[i]
+            full_seq_len = seq_lens[i]
+
+            # Write cached prefix indices (from the match result we saved)
+            cached_indices = cached_indices_list[i]
+            if cached_indices is not None and prefix_len > 0:
+                runner.req_to_token_pool.write(
+                    (slot, slice(0, prefix_len)),
+                    cached_indices[:prefix_len].to(torch.int32),
+                )
+
+            # Write new KV indices for the suffix
+            kv_indices = out_cache_loc[offset : offset + extend_len]
+            runner.req_to_token_pool.write(
+                (slot, slice(prefix_len, full_seq_len)), kv_indices
+            )
+
+            self._rid_to_req_pool_idx[rid] = slot
+            self._rid_to_kv_indices[rid] = kv_indices.clone()
+            self._rid_to_output_ids[rid] = []
+            # The prefix portion is already protected in the radix cache
+            # (from a previous request's insert).  We start with this as
+            # cache_protected_len so that subsequent insert() calls know
+            # which range is already covered.
+            self._rid_to_cache_protected_len[rid] = actual_prefix_lens[i]
+            offset += extend_len
+
+        # GDN state management: restore from track slot on cache hit, or reset
+        if gdn_pool is not None:
+            for i, m in enumerate(requests_meta):
+                rid = m["rid"]
+                working_slot = slots[i]
+                prefix_len = actual_prefix_lens[i]
+                node = matched_nodes[i]
+
+                if prefix_len > 0 and node is not None:
+                    # Cache hit — try to restore GDN state from the track slot
+                    # associated with the matched radix node.
+                    track_slot = self._node_id_to_gdn_track_slot.get(node.id)
+                    if track_slot is not None:
+                        gdn_pool.copy_states(track_slot, working_slot)
+                        logger.debug(
+                            "GDN state restored for rid=%s from track_slot=%d "
+                            "(prefix_len=%d)",
+                            rid, track_slot, prefix_len,
+                        )
+                    else:
+                        # Cache hit but no GDN snapshot — reset to zero.
+                        # This can happen if the track slot was evicted.
+                        idx = torch.tensor(
+                            [working_slot], dtype=torch.int64, device=runner.device
+                        )
+                        gdn_pool.reset_states(idx)
+                        logger.debug(
+                            "GDN state reset for rid=%s (cache hit but no "
+                            "track slot, prefix_len=%d)",
+                            rid, prefix_len,
+                        )
+                else:
+                    # No cache hit — fresh request, zero-init
+                    idx = torch.tensor(
+                        [working_slot], dtype=torch.int64, device=runner.device
+                    )
+                    gdn_pool.reset_states(idx)
+
+                # Allocate a track slot only when the radix cache is enabled;
+                # track slots are freed via the eviction callback so they must
+                # be associated with a node, which only happens when cache is on.
+                if cache is not None and not cache.disable:
+                    ts = gdn_pool.alloc_track_slot()
+                    if ts is not None:
+                        self._rid_to_gdn_track_slot[rid] = ts
+
+        # --- Step 5: Lock matched radix nodes ---
+        if cache is not None and not cache.disable:
+            for i, m in enumerate(requests_meta):
+                node = matched_nodes[i]
+                if node is not None and actual_prefix_lens[i] > 0:
+                    swa_boundary_id = cache.inc_lock_ref(node)
+                    self._rid_to_radix_lock[m["rid"]] = (node, swa_boundary_id)
+
+        return out_cache_loc, actual_prefix_lens, actual_extend_lens
+
+    def _alloc_kv_with_eviction(self, num_tokens: int) -> Optional[torch.Tensor]:
+        """Try to allocate KV tokens, evicting from radix cache if needed."""
+        runner = self._runner
+        cache = self._radix_cache
+
+        if num_tokens == 0:
+            return torch.empty(0, dtype=torch.int32, device=runner.device)
+
+        # First attempt: direct allocation
+        result = runner.token_to_kv_pool_allocator.alloc(num_tokens)
+        if result is not None:
+            return result
+
+        # Eviction loop: try evicting from radix cache to free space
+        if cache is None or cache.disable:
+            return None
+
+        for attempt in range(_MAX_EVICT_RETRIES):
+            evictable = cache.evictable_size()
+            if evictable == 0:
+                logger.warning(
+                    "KV allocation failed: need %d tokens, no evictable cache entries",
+                    num_tokens,
+                )
+                return None
+
+            # Evict a fraction of the cache (at least what we need)
+            evict_target = max(
+                num_tokens,
+                int(runner.token_to_kv_pool_allocator.size * _EVICT_FRACTION),
+            )
+            evict_result = cache.evict(evict_target)
+            logger.info(
+                "Radix cache eviction attempt %d: evicted %d tokens (target=%d)",
+                attempt + 1,
+                evict_result.full_evicted,
+                evict_target,
+            )
+
+            # Retry allocation
+            result = runner.token_to_kv_pool_allocator.alloc(num_tokens)
+            if result is not None:
+                return result
+
+        return None
+
+    def _allocate_decode(
+        self, batch: Dict[str, Any], requests_meta: List[Dict[str, Any]]
+    ) -> torch.Tensor:
+        """Allocate 1 KV token per request for a decode step.
+
+        Returns ``out_cache_loc`` tensor of shape ``[batch_size]``.
+        """
+        runner = self._runner
+        batch_size = batch["batch_size"]
+        seq_lens: List[int] = batch["seq_lens"]
+
+        # Allocate 1 new KV token per request (with eviction retry)
+        out_cache_loc = self._alloc_kv_with_eviction(batch_size)
+        if out_cache_loc is None:
+            raise RuntimeError(
+                f"Failed to allocate {batch_size} KV tokens for decode batch"
+            )
+
+        # Write the new KV token index into each request's mapping
+        for i, m in enumerate(requests_meta):
+            rid = m["rid"]
+            slot = self._rid_to_req_pool_idx.get(rid)
+            if slot is None:
+                logger.warning("Decode step for unknown rid=%s, skipping KV write", rid)
+                continue
+
+            cur_seq_len = seq_lens[i]
+            kv_new = out_cache_loc[i : i + 1]
+            # The scheduler increments req.seq_len by 1 after every step, so
+            # seq_lens[i] == (number of tokens in the KV cache INCLUDING the
+            # token being decoded now).  The new token's slot must therefore be
+            # written at index seq_lens[i] - 1, matching the position used by
+            # prepare_forward_batch_decode (positions = seq_lens - 1) and the
+            # window FlashInfer reads (req_to_token_pool[slot, 0:seq_lens[i]]).
+            write_pos = cur_seq_len - 1
+            runner.req_to_token_pool.write(
+                (slot, slice(write_pos, write_pos + 1)), kv_new
+            )
+
+            # Append to tracked kv_indices
+            prev = self._rid_to_kv_indices.get(rid)
+            if prev is not None:
+                self._rid_to_kv_indices[rid] = torch.cat([prev, kv_new])
+            else:
+                self._rid_to_kv_indices[rid] = kv_new.clone()
+
+        return out_cache_loc
+
+    # ------------------------------------------------------------------
+    # Resource cleanup
+    # ------------------------------------------------------------------
+
+    def _free_rid_resources(self, rid: str) -> None:
+        """Free GPU resources (req pool slot + KV indices) for a finished rid.
+
+        KV index ownership model (when radix cache is enabled):
+
+        ``req_to_token_pool[slot]`` contains three regions after
+        ``insert()`` returns ``new_prefix_len``::
+
+            [0, cache_protected_len)
+                Indices shared with the radix tree from a previous insert.
+                **Do not free** — the tree already owns them.
+
+            [cache_protected_len, new_prefix_len)
+                Indices allocated by THIS request that turned out to overlap
+                with tree nodes inserted concurrently.  The tree already
+                holds cloned copies → these are duplicates → **free them**.
+
+            [new_prefix_len, total_len)
+                Indices that ``insert()`` just added to the tree (cloned).
+                The tree now owns the underlying KV pool slots.
+                **Do not free** — the tree will free during eviction.
+
+        When the radix cache is disabled, all KV indices are freed directly.
+        """
+        runner = self._runner
+        cache = self._radix_cache
+
+        slot = self._rid_to_req_pool_idx.pop(rid, None)
+        kv_indices = self._rid_to_kv_indices.pop(rid, None)
+        input_ids = self._rid_to_input_ids.pop(rid, None)
+        output_ids = self._rid_to_output_ids.pop(rid, None)
+        cache_protected_len = self._rid_to_cache_protected_len.pop(rid, 0)
+        radix_lock = self._rid_to_radix_lock.pop(rid, None)
+        self._rid_to_mrope_delta.pop(rid, None)
+
+        # Free GDN track slot (if any) — the slot's association with a
+        # radix node is managed separately via _node_id_to_gdn_track_slot
+        # and the eviction callback; here we just remove the rid mapping.
+        self._rid_to_gdn_track_slot.pop(rid, None)
+
+        cache_enabled = cache is not None and not cache.disable
+
+        # ----------------------------------------------------------
+        # Phase 1: Read all KV indices BEFORE freeing anything.
+        # ----------------------------------------------------------
+        prompt_len = len(input_ids) if input_ids is not None else 0
+        decode_len = len(output_ids) if output_ids else 0
+        total_len = prompt_len + decode_len
+
+        all_kv_indices: Optional[torch.Tensor] = None
+        if slot is not None and input_ids is not None:
+            all_kv_indices = runner.req_to_token_pool.req_to_token[slot, :total_len].to(
+                torch.int64
+            )
+
+        # ----------------------------------------------------------
+        # Phase 2: Insert into radix cache (if enabled).
+        # ----------------------------------------------------------
+        did_insert = False
+        if cache_enabled and all_kv_indices is not None:
+            if self._is_hybrid and decode_len > 0:
+                # Hybrid model: insert only prompt tokens (not decode)
+                # because GDN state is only tracked at the prompt boundary.
+                prompt_kv = all_kv_indices[:prompt_len]
+                decode_kv = all_kv_indices[prompt_len:]
+                key = RadixKey(list(input_ids))
+                result = cache.insert(key, prompt_kv)
+                new_prefix_len = result.prefix_len
+
+                # Free duplicate KV indices in the overlap region.
+                if new_prefix_len > cache_protected_len:
+                    dup_indices = prompt_kv[cache_protected_len:new_prefix_len]
+                    if dup_indices.numel() > 0:
+                        runner.token_to_kv_pool_allocator.free(dup_indices)
+
+                # Free decode KV indices (tree does not own them)
+                if decode_kv.numel() > 0:
+                    runner.token_to_kv_pool_allocator.free(decode_kv)
+            else:
+                # Non-hybrid or no decode tokens: insert full sequence
+                full_token_ids = list(input_ids)
+                if output_ids:
+                    full_token_ids.extend(output_ids)
+                key = RadixKey(full_token_ids)
+                result = cache.insert(key, all_kv_indices)
+                new_prefix_len = result.prefix_len
+
+                # Free duplicate KV indices in the overlap region.
+                if new_prefix_len > cache_protected_len:
+                    dup_indices = all_kv_indices[cache_protected_len:new_prefix_len]
+                    if dup_indices.numel() > 0:
+                        runner.token_to_kv_pool_allocator.free(dup_indices)
+
+            did_insert = True
+
+        # ----------------------------------------------------------
+        # Phase 3: Unlock radix cache nodes.
+        # ----------------------------------------------------------
+        if cache_enabled and radix_lock is not None:
+            node, swa_boundary_id = radix_lock
+            cache.dec_lock_ref(node, swa_boundary_id)
+
+        # ----------------------------------------------------------
+        # Phase 4: Free KV indices not owned by the radix cache.
+        # ----------------------------------------------------------
+        if not did_insert:
+            if cache_enabled and all_kv_indices is not None:
+                # Cache enabled but insert skipped (shouldn't happen in
+                # normal flow).  Tree owns [0, cache_protected_len);
+                # free the rest.
+                tail = all_kv_indices[cache_protected_len:]
+                if tail.numel() > 0:
+                    runner.token_to_kv_pool_allocator.free(tail)
+            elif not cache_enabled:
+                # Cache disabled — free all newly-allocated KV indices.
+                if all_kv_indices is not None and all_kv_indices.numel() > 0:
+                    runner.token_to_kv_pool_allocator.free(all_kv_indices)
+                elif kv_indices is not None and kv_indices.numel() > 0:
+                    runner.token_to_kv_pool_allocator.free(kv_indices)
+
+        # ----------------------------------------------------------
+        # Phase 5: Free the req pool slot.
+        # ----------------------------------------------------------
+        if slot is not None:
+            runner.req_to_token_pool.free(slot)
+
+        logger.debug(
+            "Freed resources for rid=%s (slot=%s, kv_tokens=%d)",
+            rid,
+            slot,
+            kv_indices.numel() if kv_indices is not None else 0,
+        )
+
+    # ------------------------------------------------------------------
+    # GDN state tracking helpers (hybrid models)
+    # ------------------------------------------------------------------
+
+    def _track_gdn_state_after_extend(
+        self, requests_meta: List[Dict[str, Any]]
+    ) -> None:
+        """Snapshot working GDN state into each request's track slot.
+
+        Called immediately after ``runner.forward()`` for extend batches so
+        that the FINAL recurrent/conv state (after processing the full prompt)
+        is saved.  The track slot is later associated with a radix node in
+        ``_insert_into_radix_cache``.
+        """
+        gdn_pool = getattr(self._runner, "gdn_pool", None)
+        if gdn_pool is None:
+            return
+
+        for m in requests_meta:
+            rid = m["rid"]
+            working_slot = self._rid_to_req_pool_idx.get(rid)
+            track_slot = self._rid_to_gdn_track_slot.get(rid)
+            if working_slot is not None and track_slot is not None:
+                gdn_pool.copy_states(working_slot, track_slot)
+
+    def _on_radix_node_evict(self, node_id: int) -> None:
+        """Callback invoked by RadixCache when a node is evicted.
+
+        Frees the GDN track slot associated with the evicted node.
+        """
+        track_slot = self._node_id_to_gdn_track_slot.pop(node_id, None)
+        if track_slot is not None:
+            gdn_pool = getattr(self._runner, "gdn_pool", None)
+            if gdn_pool is not None:
+                gdn_pool.free_track_slot(track_slot)
+                logger.debug(
+                    "Freed GDN track slot %d for evicted node %d",
+                    track_slot, node_id,
+                )
+
     # ------------------------------------------------------------------
     # Cleanup
     # ------------------------------------------------------------------
 
     def shutdown(self) -> None:
-        if self._recv_from_scheduler is not None:
-            self._recv_from_scheduler.close()
-        if self._send_to_scheduler is not None:
-            self._send_to_scheduler.close()
-        if self._zmq_ctx is not None:
-            self._zmq_ctx.term()
-
-
-def run_model_runner_process(
-    recv_from_scheduler_addr: str,
-    send_to_scheduler_addr: str,
-    pipe_writer: Connection,
-) -> None:
-    """Entry point for ``torch.multiprocessing.Process(target=...)``."""
-    proc = ModelRunnerProcess(recv_from_scheduler_addr, send_to_scheduler_addr)
-    proc.init_sockets()
-
-    pipe_writer.send({"status": "ready", "process": "model_runner"})
-    pipe_writer.close()
-
-    try:
-        proc.event_loop()
-    except KeyboardInterrupt:
-        pass
-    finally:
-        proc.shutdown()
+        if self._runner is not None:
+            self._runner.shutdown()
diff --git a/pymllm/orchestrator/request_response_process.py b/pymllm/orchestrator/request_response_process.py
index fa9d92ec..5c72a14c 100644
--- a/pymllm/orchestrator/request_response_process.py
+++ b/pymllm/orchestrator/request_response_process.py
@@ -65,8 +65,12 @@ def __init__(
 
         self._loop_task: Optional[asyncio.Task] = None
 
-    def start(self, loop: asyncio.AbstractEventLoop) -> None:
-        """Kick off the background send/recv tasks on *loop*."""
+    def start(self) -> None:
+        """Bind ZMQ sockets.  Background tasks are started lazily by
+        :meth:`listen` on the first :meth:`add_request` call, so they
+        always run on the correct event loop regardless of whether the
+        caller is uvicorn, ``loop.run_until_complete``, or anything else.
+        """
         self._zmq_ctx = zmq.asyncio.Context()
         self._send_to_tokenizer = create_zmq_socket(
             self._zmq_ctx,
@@ -80,7 +84,20 @@ def start(self, loop: asyncio.AbstractEventLoop) -> None:
             self._recv_from_detokenizer_addr,
             bind=True,
         )
+
+    def listen(self) -> None:
+        """Start the send/recv background tasks on the **current** running
+        event loop.  Idempotent — subsequent calls are no-ops while the
+        tasks are still alive.
+
+        Called automatically by :meth:`add_request`, so callers never need
+        to invoke this directly.
+        """
+        if self._loop_task is not None and not self._loop_task.done():
+            return
+        loop = asyncio.get_running_loop()
         self._loop_task = loop.create_task(self._run())
+        logger.debug("RequestResponseProcess: background tasks started")
 
     async def add_request(
         self, request: GenerateReqInput
@@ -98,6 +115,8 @@ async def add_request(
         Callers should ``await state.event.wait()`` in a loop, consuming
         ``state.out_list`` entries until ``state.finished`` is ``True``.
         """
+        self.listen()
+
         if request.is_single:
             rid = request.rid if isinstance(request.rid, str) else request.rid[0]
             state = ReqState()
diff --git a/pymllm/orchestrator/scheduler_process.py b/pymllm/orchestrator/scheduler_process.py
index 8f2d9a95..8594a899 100644
--- a/pymllm/orchestrator/scheduler_process.py
+++ b/pymllm/orchestrator/scheduler_process.py
@@ -1,27 +1,28 @@
 """
-SchedulerProcess -- the central scheduling hub.
+SchedulerProcess -- the central scheduling and inference hub.
 
 Receives tokenized requests from the TokenizerProcess, organises them into
-batches, dispatches batches to the ModelRunnerProcess for forward passes,
-collects results, and streams finished token IDs to the DetokenizerProcess.
+batches, runs model forward passes via the **in-process** model runner
+(sglang-style), and streams finished token IDs to the DetokenizerProcess.
 
-Supports two modes:
-    1. Legacy ZMQ path: Receive TokenizedGenerateReqInput via ZMQ recv_pyobj
-    2. Shared queue fast path: Read rid from shared queue and metadata from shared memory
+Architecture: the scheduler owns the :class:`ModelRunnerProcess` directly
+(same process, direct function calls).  GPU resources (KV cache, req pool
+slots) are freed immediately when requests finish — no cross-process
+communication needed.
 
-When the shared queue fast path is active the scheduler also handles CUDA IPC
-tensor reconstruction via
-:func:`~pymllm.orchestrator.cuda_ipc_transport.unwrap_mm_inputs_from_ipc`.
+Request ingestion supports two modes:
+    1. ZMQ path: Receive TokenizedGenerateReqInput via ZMQ recv_pyobj
+    2. Shared queue fast path: Read from shared memory + multiprocessing queue
 
-The main ``event_loop`` scheduler flow::
+The main ``event_loop``::
 
     while True:
         recv_requests()
         process_input_requests()
-        batch = get_next_batch_to_run()
+        batch = get_next_batch_to_run()   # also frees finished GPU resources
         if batch:
-            run_batch(batch)
-            process_batch_result(batch)
+            result = run_batch(batch)      # direct call to model runner
+            process_batch_result(batch, result)
         stream_output()
 """
 
@@ -34,16 +35,297 @@
 
 import zmq
 
-from pymllm.engine.io_struct import TokenizedGenerateReqInput
+from pymllm.engine.forward_batch import ForwardMode
+from pymllm.engine.io_struct import BatchTokenIDOutput, TokenizedGenerateReqInput
 from pymllm.orchestrator.cuda_ipc_transport import (
     TensorTransportMode,
     unwrap_mm_inputs_from_ipc,
 )
-from pymllm.orchestrator.ipc_utils import create_zmq_socket
+from pymllm.orchestrator.ipc_utils import create_zmq_socket, setup_subprocess_logging
 from pymllm.orchestrator.shared_memory_queue import SharedMemoryManager, TensorQueue
 
 logger = logging.getLogger(__name__)
 
+# Default scheduling limits
+_DEFAULT_MAX_RUNNING_REQUESTS = 256
+_DEFAULT_MAX_PREFILL_TOKENS = 8192
+_DEFAULT_MAX_TOTAL_TOKENS = 131072
+_DEFAULT_MAX_NEW_TOKENS = 32768
+
+
+# ======================================================================
+# Req -- per-request state tracker
+# ======================================================================
+
+
+class Req:
+    """Tracks a single request through its lifecycle (prefill -> decode -> finish).
+
+    Created by :meth:`SchedulerProcess.process_input_requests` from a
+    :class:`~pymllm.engine.io_struct.TokenizedGenerateReqInput`.
+    """
+
+    __slots__ = (
+        "rid",
+        "input_ids",
+        "input_text",
+        "sampling_params",
+        "mm_inputs",
+        "stream",
+        "return_logprob",
+        "logprob_start_len",
+        "top_logprobs_num",
+        # KV-cache state
+        "req_pool_idx",
+        "seq_len",
+        # Prefix-cache hit (set during scheduling when radix cache is active)
+        "prefix_len",
+        # Generation state
+        "output_ids",
+        "finished_reason",
+        "is_prefilled",
+        # Sampling parameters (parsed)
+        "max_new_tokens",
+        "temperature",
+        "top_p",
+        "top_k",
+        "stop_token_ids",
+        # Streaming
+        "read_offset",
+        # Prompt length (for token accounting)
+        "prompt_len",
+    )
+
+    def __init__(
+        self,
+        rid: str,
+        input_ids: List[int],
+        input_text: str = "",
+        sampling_params: Optional[Dict[str, Any]] = None,
+        mm_inputs: Optional[Dict[str, Any]] = None,
+        stream: bool = False,
+        return_logprob: bool = False,
+        logprob_start_len: int = -1,
+        top_logprobs_num: int = 0,
+    ):
+        self.rid = rid
+        self.input_ids = list(input_ids)
+        self.input_text = input_text
+        self.mm_inputs = mm_inputs
+        self.stream = stream
+        self.return_logprob = return_logprob
+        self.logprob_start_len = logprob_start_len
+        self.top_logprobs_num = top_logprobs_num
+
+        # Parse sampling params
+        sp = sampling_params or {}
+        self.sampling_params = sp
+        self.max_new_tokens: int = sp.get("max_new_tokens", _DEFAULT_MAX_NEW_TOKENS)
+        self.temperature: float = sp.get("temperature", 1.0)
+        self.top_p: float = sp.get("top_p", 1.0)
+        self.top_k: int = sp.get("top_k", -1)
+        self.stop_token_ids: List[int] = list(sp.get("stop_token_ids", []))
+
+        # KV-cache state (assigned during scheduling)
+        self.req_pool_idx: int = -1
+        self.seq_len: int = len(input_ids)
+        # Number of prefix tokens served from the radix/KV cache (0 = no hit).
+        # Updated by process_batch_result when the model runner reports a
+        # prefix cache hit.  Used in _free_req_resources to correctly
+        # release the token budget.
+        self.prefix_len: int = 0
+
+        # Generation state
+        self.output_ids: List[int] = []
+        self.finished_reason: Optional[str] = None
+        self.is_prefilled: bool = False
+
+        # Streaming
+        self.read_offset: int = 0
+
+        # Prompt length
+        self.prompt_len: int = len(input_ids)
+
+    def check_finished(self, eos_token_id: Optional[int] = None) -> bool:
+        """Check if this request has reached a finish condition.
+
+        Sets ``finished_reason`` and returns True if finished.
+        Checks:
+        1. EOS token in the latest generated token
+        2. ``max_new_tokens`` reached
+        """
+        if self.finished_reason is not None:
+            return True
+
+        if self.output_ids:
+            last_token = self.output_ids[-1]
+            # Check model EOS token
+            if eos_token_id is not None and last_token == eos_token_id:
+                self.finished_reason = "eos"
+                return True
+            # Check stop token IDs from sampling params
+            if last_token in self.stop_token_ids:
+                self.finished_reason = "eos"
+                return True
+
+        # Check max_new_tokens
+        if len(self.output_ids) >= self.max_new_tokens:
+            self.finished_reason = "length"
+            return True
+
+        return False
+
+    @property
+    def is_finished(self) -> bool:
+        return self.finished_reason is not None
+
+    def abort(self) -> None:
+        """Mark this request as aborted."""
+        self.finished_reason = "abort"
+
+    def __repr__(self) -> str:
+        return (
+            f"Req(rid={self.rid!r}, seq_len={self.seq_len}, "
+            f"out={len(self.output_ids)}, finished={self.finished_reason})"
+        )
+
+
+# ======================================================================
+# ScheduleBatch -- batch container
+# ======================================================================
+
+
+class ScheduleBatch:
+    """Wraps a list of :class:`Req` objects for a single forward pass.
+
+    Provides helpers to assemble the batch dict sent to the ModelRunnerProcess
+    in the format expected by :class:`~pymllm.engine.forward_batch.ForwardBatch`.
+    """
+
+    def __init__(self, reqs: List[Req], forward_mode: ForwardMode):
+        self.reqs = reqs
+        self.forward_mode = forward_mode
+
+    @property
+    def batch_size(self) -> int:
+        return len(self.reqs)
+
+    def prepare_for_extend(self) -> Dict[str, Any]:
+        """Assemble a batch dict for prefill / extend forward pass.
+
+        Returns a dict with flattened ``input_ids``, per-request ``positions``,
+        ``req_pool_indices``, ``seq_lens``, ``extend_seq_lens``,
+        ``extend_prefix_lens``, and request metadata.
+
+        Note: The scheduler sends the **full** input_ids (no prefix trimming).
+        The ModelRunnerProcess performs radix cache prefix matching and
+        rebuilds the tensors with actual prefix lengths before the forward
+        pass.  The ``extend_prefix_lens`` here are always 0 from the
+        scheduler; they serve as placeholders.
+        """
+        all_input_ids: List[int] = []
+        all_positions: List[int] = []
+        req_pool_indices: List[int] = []
+        seq_lens: List[int] = []
+        extend_seq_lens: List[int] = []
+        extend_prefix_lens: List[int] = []
+        requests_meta: List[Dict[str, Any]] = []
+
+        for req in self.reqs:
+            input_len = len(req.input_ids)
+
+            # Send full input_ids; model runner will trim based on prefix
+            all_input_ids.extend(req.input_ids)
+            all_positions.extend(range(input_len))
+            req_pool_indices.append(req.req_pool_idx)
+            seq_lens.append(req.seq_len)
+            extend_seq_lens.append(input_len)
+            extend_prefix_lens.append(0)
+            requests_meta.append(
+                {
+                    "rid": req.rid,
+                    "input_ids": req.input_ids,
+                    "mm_inputs": req.mm_inputs,
+                    "sampling_params": req.sampling_params,
+                    "return_logprob": req.return_logprob,
+                    "logprob_start_len": req.logprob_start_len,
+                    "top_logprobs_num": req.top_logprobs_num,
+                }
+            )
+
+        return {
+            "forward_mode": "extend",
+            "batch_size": self.batch_size,
+            "input_ids": all_input_ids,
+            "positions": all_positions,
+            "req_pool_indices": req_pool_indices,
+            "seq_lens": seq_lens,
+            "extend_seq_lens": extend_seq_lens,
+            "extend_prefix_lens": extend_prefix_lens,
+            "requests": requests_meta,
+            "batch_id": id(self),
+            "created_at": time.time(),
+        }
+
+    def prepare_for_decode(self) -> Dict[str, Any]:
+        """Assemble a batch dict for decode forward pass (one token per request).
+
+        Returns a dict with one input token per request (the last generated
+        token), positions at ``seq_len``, and request metadata.
+        """
+        all_input_ids: List[int] = []
+        all_positions: List[int] = []
+        req_pool_indices: List[int] = []
+        seq_lens: List[int] = []
+        requests_meta: List[Dict[str, Any]] = []
+
+        for req in self.reqs:
+            # For decode, the input is the last generated token
+            if req.output_ids:
+                all_input_ids.append(req.output_ids[-1])
+            else:
+                # Fallback: last input token (shouldn't happen normally)
+                all_input_ids.append(req.input_ids[-1])
+            all_positions.append(req.seq_len)
+            req_pool_indices.append(req.req_pool_idx)
+            seq_lens.append(req.seq_len)
+            requests_meta.append(
+                {
+                    "rid": req.rid,
+                    "sampling_params": req.sampling_params,
+                    "return_logprob": req.return_logprob,
+                    "logprob_start_len": req.logprob_start_len,
+                    "top_logprobs_num": req.top_logprobs_num,
+                }
+            )
+
+        return {
+            "forward_mode": "decode",
+            "batch_size": self.batch_size,
+            "input_ids": all_input_ids,
+            "positions": all_positions,
+            "req_pool_indices": req_pool_indices,
+            "seq_lens": seq_lens,
+            "requests": requests_meta,
+            "batch_id": id(self),
+            "created_at": time.time(),
+        }
+
+    def to_batch_dict(self) -> Dict[str, Any]:
+        """Build the batch dict appropriate for the current forward mode."""
+        if self.forward_mode.is_extend():
+            return self.prepare_for_extend()
+        else:
+            return self.prepare_for_decode()
+
+    def __repr__(self) -> str:
+        return f"ScheduleBatch(mode={self.forward_mode.name}, size={self.batch_size})"
+
+
+# ======================================================================
+# SchedulerProcess
+# ======================================================================
+
 
 class SchedulerProcess:
     """Runs inside a subprocess.  Central hub that drives the inference loop."""
@@ -51,19 +333,29 @@ class SchedulerProcess:
     def __init__(
         self,
         recv_from_tokenizer_addr: str,
-        send_to_model_runner_addr: str,
-        recv_from_model_runner_addr: str,
         send_to_detokenizer_addr: str,
+        server_config: Optional[Any] = None,
+        model_config: Optional[Any] = None,
+        gpu_id: int = 0,
         shared_queue: Optional[TensorQueue] = None,
         enable_shared_queue: bool = False,
         tensor_transport_mode: TensorTransportMode = "default",
+        # Scheduling limits
+        max_running_requests: int = _DEFAULT_MAX_RUNNING_REQUESTS,
+        max_prefill_tokens: int = _DEFAULT_MAX_PREFILL_TOKENS,
+        max_total_tokens: int = _DEFAULT_MAX_TOTAL_TOKENS,
+        eos_token_ids: Optional[List[int]] = None,
+        default_max_new_tokens: int = _DEFAULT_MAX_NEW_TOKENS,
     ):
-        # ZMQ addresses
+        # ZMQ addresses (tokenizer + detokenizer only)
         self._recv_from_tokenizer_addr = recv_from_tokenizer_addr
-        self._send_to_model_runner_addr = send_to_model_runner_addr
-        self._recv_from_model_runner_addr = recv_from_model_runner_addr
         self._send_to_detokenizer_addr = send_to_detokenizer_addr
 
+        # Model config (for in-process model runner, sglang-style)
+        self._server_config = server_config
+        self._model_config = model_config
+        self._gpu_id = gpu_id
+
         # Shared queue configuration
         self._shared_queue = shared_queue
         self._enable_shared_queue = enable_shared_queue
@@ -72,16 +364,53 @@ def __init__(
         # ZMQ runtime objects (initialised in init_sockets)
         self._zmq_ctx: Optional[zmq.Context] = None
         self._recv_from_tokenizer: Optional[zmq.Socket] = None
-        self._send_to_model_runner: Optional[zmq.Socket] = None
-        self._recv_from_model_runner: Optional[zmq.Socket] = None
         self._send_to_detokenizer: Optional[zmq.Socket] = None
         self._poller: Optional[zmq.Poller] = None
 
-        # Request management
+        # In-process model runner (initialised in init_model)
+        self._model_runner = None
+
+        # Request management -- three-stage pipeline
         self._waiting_queue: Deque[TokenizedGenerateReqInput] = deque()
-        self._running_batch: Optional[Dict[str, Any]] = None
+        self._pending_queue: List[Req] = []
+        self._running_batch: List[Req] = []
         self._finished: List[Dict[str, Any]] = []
 
+        # Scheduling limits
+        self._max_running_requests = max_running_requests
+        self._max_prefill_tokens = max_prefill_tokens
+
+        # KV-cache token budget (simplified single-GPU tracking).
+        self._max_total_tokens = max_total_tokens
+        self._used_tokens: int = 0
+
+        # EOS token(s) for finish detection
+        self._eos_token_ids: List[int] = list(eos_token_ids) if eos_token_ids else []
+
+        # Default max_new_tokens (from model config or fallback)
+        self._default_max_new_tokens = default_max_new_tokens
+
+        # Monotonic request-slot counter (simplified; no GPU pool access)
+        self._next_req_pool_idx: int = 0
+
+        # ------ Throughput metrics (sglang-style interval logging) ------
+        # How often (in decode batches) to log throughput stats.
+        self._decode_log_interval: int = (
+            server_config.decode_log_interval
+            if server_config is not None and hasattr(server_config, "decode_log_interval")
+            else 40
+        )
+        # Accumulators reset at each log interval
+        self._num_prefill_tokens: int = 0       # new prefill tokens (excluding cache hits)
+        self._num_prefill_cache_tokens: int = 0 # prefill tokens served from cache
+        self._num_decode_tokens: int = 0        # generated decode tokens
+        self._num_prefill_reqs: int = 0         # prefill requests count
+        # Timestamps for throughput calculation
+        self._last_prefill_stats_tic: float = time.time()
+        self._last_decode_stats_tic: float = time.time()
+        # Forward pass counters
+        self._forward_ct_decode: int = 0
+
     # ------------------------------------------------------------------
     # Lifecycle
     # ------------------------------------------------------------------
@@ -95,18 +424,6 @@ def init_sockets(self) -> None:
             self._recv_from_tokenizer_addr,
             bind=False,
         )
-        self._send_to_model_runner = create_zmq_socket(
-            self._zmq_ctx,
-            zmq.PUSH,
-            self._send_to_model_runner_addr,
-            bind=True,
-        )
-        self._recv_from_model_runner = create_zmq_socket(
-            self._zmq_ctx,
-            zmq.PULL,
-            self._recv_from_model_runner_addr,
-            bind=True,
-        )
         self._send_to_detokenizer = create_zmq_socket(
             self._zmq_ctx,
             zmq.PUSH,
@@ -118,6 +435,22 @@ def init_sockets(self) -> None:
         self._poller = zmq.Poller()
         self._poller.register(self._recv_from_tokenizer, zmq.POLLIN)
 
+    def init_model(self) -> None:
+        """Create and initialise the in-process model runner (sglang-style).
+
+        Must be called after ``init_sockets`` and inside the subprocess
+        (after spawn) since it performs CUDA initialisation.
+        """
+        from pymllm.orchestrator.model_runner_process import ModelRunnerProcess
+
+        self._model_runner = ModelRunnerProcess(
+            gpu_id=self._gpu_id,
+            server_config=self._server_config,
+            model_config=self._model_config,
+        )
+        self._model_runner.init_model()
+        logger.info("In-process model runner initialised on GPU %d", self._gpu_id)
+
     def event_loop(self) -> None:
         """Infinite scheduling loop."""
         logger.info(
@@ -170,6 +503,8 @@ def _recv_from_zmq(self) -> None:
                 self._waiting_queue = type(self._waiting_queue)(
                     r for r in self._waiting_queue if r.rid != rid
                 )
+                # Also abort from pending queue
+                self._abort_request(rid)
             else:
                 self._waiting_queue.append(msg)
 
@@ -236,90 +571,398 @@ def _recv_from_shared_queue(self) -> None:
     # ------------------------------------------------------------------
 
     def process_input_requests(self) -> None:
-        """Pre-process and validate requests sitting in ``_waiting_queue``.
-
-        TODO: attach sampling params, allocate KV-cache slots, etc.
+        """Convert raw :class:`TokenizedGenerateReqInput` in ``_waiting_queue``
+        into :class:`Req` objects and move them to ``_pending_queue``.
+
+        For each request:
+        1. Parse sampling params (max_new_tokens, temperature, top_p, top_k,
+           stop_token_ids with defaults from EOS token).
+        2. Create a ``Req`` object.
+        3. Move from ``_waiting_queue`` to ``_pending_queue``.
         """
-        pass
+        while self._waiting_queue:
+            raw = self._waiting_queue.popleft()
+
+            # Merge EOS token into stop_token_ids if not already present
+            sp = dict(raw.sampling_params) if raw.sampling_params else {}
+            # Inject model-aware default for max_new_tokens when not provided
+            if "max_new_tokens" not in sp:
+                sp["max_new_tokens"] = self._default_max_new_tokens
+            stop_ids = list(sp.get("stop_token_ids", []))
+            for eid in self._eos_token_ids:
+                if eid not in stop_ids:
+                    stop_ids.append(eid)
+            sp["stop_token_ids"] = stop_ids
+
+            req = Req(
+                rid=raw.rid,
+                input_ids=raw.input_ids,
+                input_text=raw.input_text,
+                sampling_params=sp,
+                mm_inputs=raw.mm_inputs,
+                stream=raw.stream,
+                return_logprob=raw.return_logprob,
+                logprob_start_len=raw.logprob_start_len,
+                top_logprobs_num=raw.top_logprobs_num,
+            )
+            self._pending_queue.append(req)
+            logger.debug("Processed input request %s (len=%d)", req.rid, req.seq_len)
 
     # ------------------------------------------------------------------
     # Step 3: build the next batch
     # ------------------------------------------------------------------
 
-    def get_next_batch_to_run(self) -> Optional[Dict[str, Any]]:
-        """Select requests from ``_waiting_queue`` and form a batch.
-
-        TODO: implement real batching / scheduling policy.
+    def get_next_batch_to_run(self) -> Optional[ScheduleBatch]:
+        """Implements continuous batching with two phases.
+
+        1. **Filter finished**: Remove finished requests from
+           ``_running_batch`` and free their token budget.
+        2. **Schedule new prefills**: From ``_pending_queue``, admit
+           requests that fit within the token budget and
+           ``max_running_requests``.
+        3. **Build batch**:
+           - If new prefill requests exist -> EXTEND batch
+           - Else if running decode requests exist -> DECODE batch
+           - Else -> None (idle)
+
+        Note on prefix cache: The actual prefix matching is done by the
+        ModelRunnerProcess (which owns the RadixCache).  The scheduler
+        uses ``input_len`` as a conservative budget estimate.  The model
+        runner reports back actual ``prefix_len`` in results, and the
+        scheduler adjusts ``_used_tokens`` accordingly in
+        ``process_batch_result``.
         """
-        if not self._waiting_queue:
-            return None
+        # Phase 1: filter finished requests from running batch
+        still_running: List[Req] = []
+        for req in self._running_batch:
+            if req.is_finished:
+                self._model_runner._free_rid_resources(req.rid)
+                self._free_req_resources(req)
+            else:
+                still_running.append(req)
+        self._running_batch = still_running
+
+        # Phase 2: schedule new prefill requests from pending queue
+        new_prefill: List[Req] = []
+        remaining_pending: List[Req] = []
+        prefill_token_budget = self._max_prefill_tokens
+
+        for req in self._pending_queue:
+            input_len = len(req.input_ids)
+            total_running = len(self._running_batch) + len(new_prefill)
+
+            # Check capacity constraints.
+            # We reserve the full input_len as KV budget (conservative).
+            # If the model runner finds a prefix cache hit, some tokens
+            # won't need new KV allocation; the budget is corrected in
+            # process_batch_result.
+            can_fit_request = total_running < self._max_running_requests
+            can_fit_tokens = (self._used_tokens + input_len) <= self._max_total_tokens
+            can_fit_prefill = input_len <= prefill_token_budget
+
+            if can_fit_request and can_fit_tokens and can_fit_prefill:
+                # Allocate req pool slot
+                req.req_pool_idx = self._next_req_pool_idx
+                self._next_req_pool_idx += 1
+                # Reserve token budget (full input_len as conservative estimate)
+                self._used_tokens += input_len
+                prefill_token_budget -= input_len
+                new_prefill.append(req)
+                logger.debug(
+                    "Scheduled prefill for %s (len=%d, used=%d/%d)",
+                    req.rid,
+                    input_len,
+                    self._used_tokens,
+                    self._max_total_tokens,
+                )
+            else:
+                remaining_pending.append(req)
 
-        batch_requests: List[Dict[str, Any]] = []
-        # TODO: respect max_running_requests, memory budget, etc.
-        while self._waiting_queue:
-            batch_requests.append(self._waiting_queue.popleft())
+        self._pending_queue = remaining_pending
 
-        batch = {
-            "requests": batch_requests,
-            "batch_id": id(batch_requests),
-            "created_at": time.time(),
-        }
-        return batch
+        # Phase 3: build batch
+        if new_prefill:
+            return ScheduleBatch(new_prefill, ForwardMode.EXTEND)
+        elif self._running_batch:
+            return ScheduleBatch(self._running_batch, ForwardMode.DECODE)
+        else:
+            return None
 
     # ------------------------------------------------------------------
     # Step 4: run the batch via ModelRunnerProcess
     # ------------------------------------------------------------------
 
-    def run_batch(self, batch: Dict[str, Any]) -> Dict[str, Any]:
-        """Send *batch* to ModelRunnerProcess and wait for the result.
+    def run_batch(self, batch: ScheduleBatch) -> Dict[str, Any]:
+        """Execute the batch via the in-process model runner (sglang-style).
 
-        This is a **blocking** call: the scheduler is synchronous with the
-        model runner for simplicity.  Overlap scheduling can be added later.
+        Direct function call — no ZMQ serialisation overhead.
         """
-        self._send_to_model_runner.send_pyobj(batch)
-        result = self._recv_from_model_runner.recv_pyobj()
-        return result
+        batch_dict = batch.to_batch_dict()
+        return self._model_runner._forward_batch(batch_dict)
 
     # ------------------------------------------------------------------
     # Step 5: process batch result
     # ------------------------------------------------------------------
 
     def process_batch_result(
-        self, batch: Dict[str, Any], result: Dict[str, Any]
+        self, batch: ScheduleBatch, result: Dict[str, Any]
     ) -> None:
         """Handle the result returned by the ModelRunnerProcess.
 
-        TODO: check completion status (EOS, max_tokens), manage KV-cache,
-        split finished vs. unfinished requests.
+        For each request in the result:
+        1. Update ``prefix_len`` from the model runner's radix cache hit.
+        2. Adjust ``_used_tokens`` if a prefix cache hit was found (the
+           scheduler over-reserved during scheduling).
+        3. Append new token(s) to ``req.output_ids``.
+        4. Increment ``req.seq_len``.
+        5. Call ``req.check_finished()`` (EOS token, max_new_tokens).
+        6. If prefill request: mark ``req.is_prefilled = True``, move to
+           running batch for decode.
+        7. If finished: collect for output, free KV-cache budget.
         """
-        finished_requests = result.get("finished", [])
-        unfinished_requests = result.get("unfinished", [])
-
-        self._finished.extend(finished_requests)
-
-        # Put unfinished requests back for the next iteration
-        for req in unfinished_requests:
-            self._waiting_queue.appendleft(req)
+        # Build a rid -> Req lookup for the batch
+        rid_to_req: Dict[str, Req] = {req.rid: req for req in batch.reqs}
+
+        # The result may contain per-request outputs in "finished" and
+        # "unfinished" lists, or a flat "outputs" list. Handle both.
+        output_items: List[Dict[str, Any]] = []
+        output_items.extend(result.get("finished", []))
+        output_items.extend(result.get("unfinished", []))
+        if "outputs" in result:
+            output_items.extend(result["outputs"])
+
+        for out in output_items:
+            rid = out.get("rid")
+            req = rid_to_req.get(rid)
+            if req is None:
+                logger.warning("Result for unknown rid=%s, skipping", rid)
+                continue
+
+            # Update prefix_len from model runner's radix cache matching.
+            # The model runner reports the actual prefix_len it found.
+            # The scheduler originally reserved full input_len in
+            # get_next_batch_to_run; correct the over-reservation now.
+            if "prefix_len" in out and batch.forward_mode.is_extend():
+                actual_prefix_len = out["prefix_len"]
+                if actual_prefix_len > req.prefix_len:
+                    saved = actual_prefix_len - req.prefix_len
+                    req.prefix_len = actual_prefix_len
+                    # Give back the over-reserved tokens.  The model runner
+                    # reused cached KV for `saved` tokens, so those tokens
+                    # do not consume new KV pool slots.
+                    self._used_tokens = max(0, self._used_tokens - saved)
+                    logger.info(
+                        "Prefix cache hit for rid=%s: %d tokens reused, "
+                        "budget adjusted by -%d (used=%d/%d)",
+                        rid,
+                        actual_prefix_len,
+                        saved,
+                        self._used_tokens,
+                        self._max_total_tokens,
+                    )
+
+            # Append generated token(s)
+            new_token_ids = out.get("output_token_ids", [])
+            if isinstance(new_token_ids, int):
+                new_token_ids = [new_token_ids]
+            req.output_ids.extend(new_token_ids)
+            req.seq_len += len(new_token_ids)
+
+            # Update token budget for newly generated tokens
+            self._used_tokens += len(new_token_ids)
+
+            # Check finish conditions
+            req.check_finished(eos_token_id=self._eos_token_ids[0] if self._eos_token_ids else None)
+
+        # Process batch requests based on forward mode
+        if batch.forward_mode.is_extend():
+            # Prefill batch: mark as prefilled and route
+            for req in batch.reqs:
+                req.is_prefilled = True
+                if req.is_finished:
+                    self._collect_finished_output(req)
+                    self._model_runner._free_rid_resources(req.rid)
+                    self._free_req_resources(req)
+                else:
+                    self._running_batch.append(req)
+
+            # --- Accumulate prefill metrics ---
+            total_input = 0
+            total_cached = 0
+            for req in batch.reqs:
+                total_input += req.prompt_len
+                total_cached += req.prefix_len
+            self._num_prefill_tokens += total_input - total_cached
+            self._num_prefill_cache_tokens += total_cached
+            self._num_prefill_reqs += len(batch.reqs)
+            self._log_prefill_stats()
+        else:
+            # Decode batch: check finish and collect
+            new_running: List[Req] = []
+            for req in batch.reqs:
+                if req.is_finished:
+                    self._collect_finished_output(req)
+                    self._model_runner._free_rid_resources(req.rid)
+                    self._free_req_resources(req)
+                else:
+                    new_running.append(req)
+            self._running_batch = new_running
+
+            # --- Accumulate decode metrics ---
+            self._num_decode_tokens += batch.batch_size  # 1 token per request
+            self._forward_ct_decode += 1
+            if (
+                self._decode_log_interval > 0
+                and self._forward_ct_decode % self._decode_log_interval == 0
+            ):
+                self._log_decode_stats()
 
     # ------------------------------------------------------------------
     # Step 6: stream output to DetokenizerProcess
     # ------------------------------------------------------------------
 
     def stream_output(self) -> None:
-        """Send finished token-ID outputs to the DetokenizerProcess."""
+        """Send finished/streaming outputs to the DetokenizerProcess.
+
+        Produces :class:`~pymllm.engine.io_struct.BatchTokenIDOutput`-compatible
+        dicts.  For streaming requests, intermediate tokens are also sent.
+        """
+        # Collect streaming outputs from running requests
+        for req in self._running_batch:
+            if req.stream and len(req.output_ids) > req.read_offset:
+                decode_ids = req.output_ids[req.read_offset :]
+                output = {
+                    "rids": [req.rid],
+                    "finished_reasons": [None],
+                    "decode_ids": decode_ids,
+                    "read_offsets": [req.read_offset],
+                    "output_ids": list(req.output_ids),
+                    "skip_special_tokens": [True],
+                    "prompt_tokens": [req.prompt_len],
+                    "completion_tokens": [len(req.output_ids)],
+                }
+                req.read_offset = len(req.output_ids)
+                self._send_to_detokenizer.send_pyobj(output)
+
+        # Send finished outputs
         while self._finished:
             item = self._finished.pop(0)
             self._send_to_detokenizer.send_pyobj(item)
 
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _log_prefill_stats(self) -> None:
+        """Log prefill throughput at INFO level (called after each prefill batch)."""
+        now = time.time()
+        elapsed = now - self._last_prefill_stats_tic
+        self._last_prefill_stats_tic = now
+
+        if elapsed > 0:
+            input_throughput = self._num_prefill_tokens / elapsed
+        else:
+            input_throughput = 0.0
+
+        logger.info(
+            "Prefill batch: %d reqs, "
+            "new tokens: %d, "
+            "cached tokens: %d, "
+            "input throughput: %.2f token/s",
+            self._num_prefill_reqs,
+            self._num_prefill_tokens,
+            self._num_prefill_cache_tokens,
+            input_throughput,
+        )
+        # Reset accumulators
+        self._num_prefill_tokens = 0
+        self._num_prefill_cache_tokens = 0
+        self._num_prefill_reqs = 0
+
+    def _log_decode_stats(self) -> None:
+        """Log decode throughput at INFO level (called every decode_log_interval batches)."""
+        now = time.time()
+        elapsed = now - self._last_decode_stats_tic
+        self._last_decode_stats_tic = now
+
+        if elapsed > 0:
+            gen_throughput = self._num_decode_tokens / elapsed
+        else:
+            gen_throughput = 0.0
+
+        logger.info(
+            "Decode: %d steps, "
+            "gen tokens: %d, "
+            "running: %d reqs, "
+            "gen throughput: %.2f token/s",
+            self._forward_ct_decode,
+            self._num_decode_tokens,
+            len(self._running_batch),
+            gen_throughput,
+        )
+        # Reset accumulators
+        self._num_decode_tokens = 0
+        self._forward_ct_decode = 0
+
+    def _collect_finished_output(self, req: Req) -> None:
+        """Build a finished output dict and add it to ``_finished``."""
+        decode_ids = req.output_ids[req.read_offset :]
+        output: Dict[str, Any] = {
+            "rids": [req.rid],
+            "finished_reasons": [req.finished_reason],
+            "decode_ids": decode_ids,
+            "read_offsets": [req.read_offset],
+            "output_ids": list(req.output_ids),
+            "skip_special_tokens": [True],
+            "prompt_tokens": [req.prompt_len],
+            "completion_tokens": [len(req.output_ids)],
+        }
+        self._finished.append(output)
+        logger.debug(
+            "Request %s finished: reason=%s, tokens=%d",
+            req.rid,
+            req.finished_reason,
+            len(req.output_ids),
+        )
+
+    def _free_req_resources(self, req: Req) -> None:
+        """Release KV-cache token budget for a finished request.
+
+        The budget was charged as follows:
+        - At scheduling: ``+input_len`` (full prompt as conservative estimate)
+        - After prefix correction: ``-prefix_len`` (cached prefix doesn't need
+          new KV allocation; model runner manages those via radix cache)
+        - At each decode step: ``+1`` per generated token
+
+        So the net charge for this request is:
+            ``(input_len - prefix_len) + num_decode_tokens``
+            = ``seq_len - prefix_len``
+
+        We release exactly that amount.
+        """
+        tokens_to_free = req.seq_len - req.prefix_len
+        self._used_tokens = max(0, self._used_tokens - tokens_to_free)
+        req.req_pool_idx = -1
+
+    def _abort_request(self, rid: str) -> None:
+        """Abort a request by rid from pending or running queues."""
+        # Remove from pending queue
+        self._pending_queue = [r for r in self._pending_queue if r.rid != rid]
+        # Abort in running batch
+        for req in self._running_batch:
+            if req.rid == rid:
+                req.abort()
+                break
+
     # ------------------------------------------------------------------
     # Cleanup
     # ------------------------------------------------------------------
 
     def shutdown(self) -> None:
+        if self._model_runner is not None:
+            self._model_runner.shutdown()
         for sock in (
             self._recv_from_tokenizer,
-            self._send_to_model_runner,
-            self._recv_from_model_runner,
             self._send_to_detokenizer,
         ):
             if sock is not None:
@@ -330,25 +973,38 @@ def shutdown(self) -> None:
 
 def run_scheduler_process(
     recv_from_tokenizer_addr: str,
-    send_to_model_runner_addr: str,
-    recv_from_model_runner_addr: str,
     send_to_detokenizer_addr: str,
     pipe_writer: Connection,
     shared_queue: Optional[TensorQueue] = None,
     enable_shared_queue: bool = False,
     tensor_transport_mode: TensorTransportMode = "default",
+    log_level: str = "info",
+    default_max_new_tokens: int = _DEFAULT_MAX_NEW_TOKENS,
+    eos_token_ids: Optional[List[int]] = None,
+    server_config: Optional[Any] = None,
+    model_config: Optional[Any] = None,
+    gpu_id: int = 0,
 ) -> None:
-    """Entry point for ``torch.multiprocessing.Process(target=...)``."""
+    """Entry point for ``torch.multiprocessing.Process(target=...)``.
+
+    The scheduler process now also owns the model runner (sglang-style),
+    so model initialisation happens here.
+    """
+    setup_subprocess_logging(log_level)
     proc = SchedulerProcess(
         recv_from_tokenizer_addr,
-        send_to_model_runner_addr,
-        recv_from_model_runner_addr,
         send_to_detokenizer_addr,
+        server_config=server_config,
+        model_config=model_config,
+        gpu_id=gpu_id,
         shared_queue=shared_queue,
         enable_shared_queue=enable_shared_queue,
         tensor_transport_mode=tensor_transport_mode,
+        default_max_new_tokens=default_max_new_tokens,
+        eos_token_ids=eos_token_ids,
     )
     proc.init_sockets()
+    proc.init_model()
 
     pipe_writer.send({"status": "ready", "process": "scheduler"})
     pipe_writer.close()
diff --git a/pymllm/orchestrator/tokenizer_process.py b/pymllm/orchestrator/tokenizer_process.py
index 587a7c1e..703618a4 100644
--- a/pymllm/orchestrator/tokenizer_process.py
+++ b/pymllm/orchestrator/tokenizer_process.py
@@ -35,7 +35,7 @@
 
 from pymllm.engine.io_struct import TokenizedGenerateReqInput
 from pymllm.orchestrator.cuda_ipc_transport import MmItemMemoryPool, TensorTransportMode
-from pymllm.orchestrator.ipc_utils import create_zmq_socket
+from pymllm.orchestrator.ipc_utils import create_zmq_socket, setup_subprocess_logging
 from pymllm.orchestrator.shared_memory_queue import SharedMemoryManager, TensorQueue
 
 logger = logging.getLogger(__name__)
@@ -352,6 +352,7 @@ def _tokenize(
                 )
             # Accept a list for robustness; take the first element.
             input_text = str(text[0]) if isinstance(text, list) else str(text)
+            logger.debug(f"Tokenizing input text {input_text}")
 
             encode_kwargs: Dict[str, Any] = {
                 "add_special_tokens": True,
@@ -485,6 +486,7 @@ def run_tokenizer_process(
     shared_queue: Optional[TensorQueue] = None,
 ) -> None:
     """Entry point for ``torch.multiprocessing.Process(target=...)``."""
+    setup_subprocess_logging(tokenizer_cfg.get("log_level", "info"))
     proc = TokenizerProcess(
         recv_from_rr_addr, send_to_scheduler_addr, tokenizer_cfg, shared_queue
     )
diff --git a/pymllm/parsers/__init__.py b/pymllm/parsers/__init__.py
new file mode 100644
index 00000000..5ac5c292
--- /dev/null
+++ b/pymllm/parsers/__init__.py
@@ -0,0 +1,10 @@
+"""Output parsers for reasoning (thinking) content and tool calls."""
+
+from pymllm.parsers.reasoning_parser import ReasoningParser
+from pymllm.parsers.tool_call_parser import ToolCallParser, ToolCallItem
+
+__all__ = [
+    "ReasoningParser",
+    "ToolCallParser",
+    "ToolCallItem",
+]
diff --git a/pymllm/parsers/reasoning_parser.py b/pymllm/parsers/reasoning_parser.py
new file mode 100644
index 00000000..1f73c788
--- /dev/null
+++ b/pymllm/parsers/reasoning_parser.py
@@ -0,0 +1,212 @@
+"""Reasoning / thinking content parser.
+
+Separates ``<think>...</think>`` (or model-specific markers) from normal
+assistant content.  Supports both one-shot and incremental streaming modes.
+
+Usage::
+
+    # Non-streaming
+    parser = ReasoningParser("qwen3")
+    reasoning, content = parser.parse_non_stream(full_text)
+
+    # Streaming
+    parser = ReasoningParser("qwen3")
+    for delta in deltas:
+        reasoning_delta, content_delta = parser.parse_stream_chunk(delta)
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Dict, Optional, Tuple, Type
+
+
+# ---------------------------------------------------------------------------
+# Detector registry
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class _DetectorConfig:
+    start: str
+    end: str
+    force: bool  # True = always assume reasoning at start
+
+
+_DETECTOR_MAP: Dict[str, _DetectorConfig] = {
+    # DeepSeek-R1: always starts in reasoning mode
+    "deepseek-r1": _DetectorConfig("<think>", "</think>", force=True),
+    # Qwen3: optional thinking (controlled by request)
+    "qwen3": _DetectorConfig("<think>", "</think>", force=False),
+    # Qwen3 forced thinking
+    "qwen3-thinking": _DetectorConfig("<think>", "</think>", force=True),
+    # GLM-4.5
+    "glm45": _DetectorConfig("<think>", "</think>", force=False),
+    # Kimi
+    "kimi": _DetectorConfig("\u25c1think\u25b7", "\u25c1/think\u25b7", force=False),
+}
+
+
+# ---------------------------------------------------------------------------
+# ReasoningParser
+# ---------------------------------------------------------------------------
+
+
+class ReasoningParser:
+    """Model-agnostic reasoning content parser.
+
+    Parameters
+    ----------
+    model_type
+        Key into the detector registry (e.g. ``"qwen3"``, ``"deepseek-r1"``).
+    stream_reasoning
+        If ``True``, stream reasoning content incrementally as it arrives.
+        If ``False``, buffer reasoning until the end tag is found.
+    """
+
+    SUPPORTED = set(_DETECTOR_MAP)
+
+    def __init__(self, model_type: str, stream_reasoning: bool = True):
+        cfg = _DETECTOR_MAP.get(model_type)
+        if cfg is None:
+            raise ValueError(
+                f"Unknown reasoning parser {model_type!r}. "
+                f"Supported: {sorted(_DETECTOR_MAP)}"
+            )
+        self._start = cfg.start
+        self._end = cfg.end
+        self._force = cfg.force
+        self._stream_reasoning = stream_reasoning
+
+        # -- streaming state --
+        self._buffer = ""
+        self._in_reasoning = cfg.force
+        self._start_consumed = False  # True once start tag has been stripped
+        self._done = False  # True once end tag has been seen
+
+    # ------------------------------------------------------------------ #
+    # Non-streaming
+    # ------------------------------------------------------------------ #
+
+    def parse_non_stream(self, text: str) -> Tuple[Optional[str], str]:
+        """Parse complete text.
+
+        Returns ``(reasoning_content, content)`` where either may be empty.
+        """
+        start_idx = text.find(self._start)
+        end_idx = text.find(self._end)
+
+        if start_idx == -1 and not self._force:
+            return None, text
+
+        # Determine boundaries
+        if self._force and start_idx == -1:
+            # Model didn't emit explicit start tag; treat prefix as reasoning
+            reason_start = 0
+        else:
+            reason_start = start_idx + len(self._start)
+
+        before = text[:start_idx] if start_idx != -1 else ""
+
+        if end_idx != -1 and end_idx >= reason_start:
+            reasoning = text[reason_start:end_idx]
+            after = text[end_idx + len(self._end) :]
+        else:
+            reasoning = text[reason_start:]
+            after = ""
+
+        content = (before + after).strip()
+        reasoning = reasoning.strip()
+        return reasoning or None, content
+
+    # ------------------------------------------------------------------ #
+    # Streaming
+    # ------------------------------------------------------------------ #
+
+    def parse_stream_chunk(self, delta: str) -> Tuple[str, str]:
+        """Parse an incremental streaming delta.
+
+        Returns ``(reasoning_delta, content_delta)``.  Either may be ``""``.
+        """
+        if not delta:
+            return "", ""
+
+        if self._done:
+            return "", delta
+
+        self._buffer += delta
+        reasoning_out = ""
+        content_out = ""
+
+        # In forced reasoning mode, consume the start tag if it appears
+        # (the model may or may not emit it explicitly).
+        if self._in_reasoning and not self._start_consumed:
+            idx = self._buffer.find(self._start)
+            if idx != -1:
+                # Start tag found — strip it and any text before it
+                self._buffer = self._buffer[idx + len(self._start) :]
+                self._start_consumed = True
+            elif _could_be_partial(self._buffer, self._start):
+                # Might be a partial start tag — hold the buffer
+                return "", ""
+            else:
+                # No start tag coming — mark consumed and continue
+                self._start_consumed = True
+
+        if not self._in_reasoning:
+            # --- look for start tag ---
+            idx = self._buffer.find(self._start)
+            if idx != -1:
+                content_out += self._buffer[:idx]
+                self._buffer = self._buffer[idx + len(self._start) :]
+                self._in_reasoning = True
+                self._start_consumed = True
+            elif _could_be_partial(self._buffer, self._start):
+                # Potential partial match at tail — hold the buffer
+                safe = len(self._buffer) - len(self._start) + 1
+                if safe > 0:
+                    content_out += self._buffer[:safe]
+                    self._buffer = self._buffer[safe:]
+                return "", content_out
+            else:
+                content_out += self._buffer
+                self._buffer = ""
+                return "", content_out
+
+        if self._in_reasoning:
+            # --- look for end tag ---
+            idx = self._buffer.find(self._end)
+            if idx != -1:
+                reasoning_out += self._buffer[:idx]
+                after = self._buffer[idx + len(self._end) :]
+                self._buffer = ""
+                self._in_reasoning = False
+                self._done = True
+                if after:
+                    content_out += after
+            elif _could_be_partial(self._buffer, self._end):
+                safe = len(self._buffer) - len(self._end) + 1
+                if safe > 0:
+                    reasoning_out += self._buffer[:safe]
+                    self._buffer = self._buffer[safe:]
+            else:
+                reasoning_out += self._buffer
+                self._buffer = ""
+
+        if not self._stream_reasoning:
+            reasoning_out = ""
+
+        return reasoning_out, content_out
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _could_be_partial(text: str, pattern: str) -> bool:
+    """Return True if *text* ends with a prefix of *pattern*."""
+    for i in range(1, len(pattern)):
+        if text.endswith(pattern[:i]):
+            return True
+    return False
diff --git a/pymllm/parsers/tool_call_parser.py b/pymllm/parsers/tool_call_parser.py
new file mode 100644
index 00000000..fdfe9391
--- /dev/null
+++ b/pymllm/parsers/tool_call_parser.py
@@ -0,0 +1,433 @@
+"""Tool-call (function-calling) output parser.
+
+Extracts structured tool calls from model output text.  Supports both
+one-shot and incremental streaming modes.
+
+Formats supported:
+
+* **qwen25** — ``<tool_call>{"name":...,"arguments":...}</tool_call>``
+* **llama3** — ``<|python_tag|>{"name":...,"parameters":...}``
+* **hermes** — ``<tool_call>{"name":...,"arguments":...}</tool_call>`` (same tags, Hermes schema)
+
+Usage::
+
+    # Non-streaming
+    parser = ToolCallParser("qwen25", tools=tools_list)
+    content, tool_calls = parser.parse_non_stream(full_text)
+
+    # Streaming
+    parser = ToolCallParser("qwen25", tools=tools_list)
+    for delta in deltas:
+        content_delta, tool_call_deltas = parser.parse_stream_chunk(delta)
+"""
+
+from __future__ import annotations
+
+import json
+import re
+import uuid
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple
+
+
+# ---------------------------------------------------------------------------
+# Data structures
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class ToolCallItem:
+    """A single parsed tool call."""
+
+    name: Optional[str] = None
+    arguments: str = ""
+    tool_call_id: str = ""
+    index: int = 0
+
+    def to_openai_dict(self, streaming: bool = True) -> Dict[str, Any]:
+        """Convert to OpenAI ``tool_calls[]`` element format.
+
+        Parameters
+        ----------
+        streaming
+            If True, include ``index`` (streaming delta format).
+            If False, omit ``index`` (non-streaming message format).
+        """
+        d: Dict[str, Any] = {"type": "function", "function": {}}
+        if streaming:
+            d["index"] = self.index
+        if self.tool_call_id:
+            d["id"] = self.tool_call_id
+        fn: Dict[str, Any] = d["function"]
+        if self.name is not None:
+            fn["name"] = self.name
+        fn["arguments"] = self.arguments or ""
+        return d
+
+
+# ---------------------------------------------------------------------------
+# Detector base
+# ---------------------------------------------------------------------------
+
+
+@dataclass(frozen=True)
+class _FormatConfig:
+    bot_token: str
+    end_token: str
+    # Regex to extract individual call bodies between bot/end tokens.
+    # If None, the entire text between bot and end tokens is one call.
+    call_regex: Optional[str] = None
+
+
+_FORMAT_MAP: Dict[str, _FormatConfig] = {
+    "qwen25": _FormatConfig(
+        bot_token="<tool_call>\n",
+        end_token="\n</tool_call>",
+    ),
+    "qwen3_coder": _FormatConfig(
+        bot_token="<tool_call>",
+        end_token="</tool_call>",
+    ),
+    "hermes": _FormatConfig(
+        bot_token="<tool_call>\n",
+        end_token="\n</tool_call>",
+    ),
+    "llama3": _FormatConfig(
+        bot_token="<|python_tag|>",
+        end_token="",  # Llama3 uses EOT, detected via EOS
+    ),
+}
+
+
+# ---------------------------------------------------------------------------
+# ToolCallParser
+# ---------------------------------------------------------------------------
+
+
+class ToolCallParser:
+    """Model-agnostic tool-call parser.
+
+    Parameters
+    ----------
+    model_type
+        Key into the format registry (e.g. ``"qwen25"``, ``"llama3"``).
+    tools
+        The ``tools`` list from the OpenAI chat request (used to resolve
+        function names).
+    """
+
+    SUPPORTED = set(_FORMAT_MAP)
+
+    def __init__(self, model_type: str, tools: Optional[List[Any]] = None):
+        cfg = _FORMAT_MAP.get(model_type)
+        if cfg is None:
+            raise ValueError(
+                f"Unknown tool-call parser {model_type!r}. "
+                f"Supported: {sorted(_FORMAT_MAP)}"
+            )
+        self._bot = cfg.bot_token
+        self._end = cfg.end_token
+        self._model_type = model_type
+        self._tools = tools or []
+
+        # -- streaming state --
+        self._buffer = ""
+        self._in_call = False
+        self._current_tool_idx = 0
+        self._current_call_buf = ""
+        self._prev_args_len = 0
+        self._name_sent = False
+        self._completed_calls: List[ToolCallItem] = []
+
+    # ------------------------------------------------------------------ #
+    # Non-streaming
+    # ------------------------------------------------------------------ #
+
+    def has_tool_call(self, text: str) -> bool:
+        """Return True if *text* contains a tool-call pattern."""
+        return self._bot in text
+
+    def parse_non_stream(
+        self, text: str
+    ) -> Tuple[str, List[ToolCallItem]]:
+        """Parse complete text.
+
+        Returns ``(remaining_content, tool_calls)``.
+        """
+        if not self.has_tool_call(text):
+            return text, []
+
+        tool_calls: List[ToolCallItem] = []
+        normal_parts: List[str] = []
+
+        remaining = text
+        idx = 0
+        while True:
+            bot_pos = remaining.find(self._bot)
+            if bot_pos == -1:
+                normal_parts.append(remaining)
+                break
+            normal_parts.append(remaining[:bot_pos])
+            remaining = remaining[bot_pos + len(self._bot) :]
+
+            if self._end:
+                end_pos = remaining.find(self._end)
+                if end_pos == -1:
+                    call_body = remaining
+                    remaining = ""
+                else:
+                    call_body = remaining[:end_pos]
+                    remaining = remaining[end_pos + len(self._end) :]
+            else:
+                call_body = remaining
+                remaining = ""
+
+            parsed = self._parse_call_body(call_body.strip())
+            if parsed is not None:
+                parsed.index = idx
+                parsed.tool_call_id = _make_tool_call_id()
+                tool_calls.append(parsed)
+                idx += 1
+
+        content = "".join(normal_parts).strip()
+        return content, tool_calls
+
+    # ------------------------------------------------------------------ #
+    # Streaming
+    # ------------------------------------------------------------------ #
+
+    def parse_stream_chunk(
+        self, delta: str
+    ) -> Tuple[str, List[ToolCallItem]]:
+        """Parse an incremental streaming delta.
+
+        Returns ``(content_delta, tool_call_items)``.
+
+        For tool call items:
+        - First item for a call: ``name`` is set, ``arguments`` is ``""``.
+        - Subsequent items: ``name`` is ``None``, ``arguments`` is the new
+          characters appended (argument delta).
+        """
+        if not delta:
+            return "", []
+
+        self._buffer += delta
+        content_out = ""
+        items: List[ToolCallItem] = []
+
+        while True:
+            if not self._in_call:
+                # --- look for bot token ---
+                bot_pos = self._buffer.find(self._bot)
+                if bot_pos != -1:
+                    content_out += self._buffer[:bot_pos]
+                    self._buffer = self._buffer[bot_pos + len(self._bot) :]
+                    self._in_call = True
+                    self._current_call_buf = ""
+                    self._prev_args_len = 0
+                    self._name_sent = False
+                    continue  # try to process call content
+                else:
+                    # Check for partial bot token at tail
+                    if self._bot and _could_be_partial(self._buffer, self._bot):
+                        safe = len(self._buffer) - len(self._bot) + 1
+                        if safe > 0:
+                            content_out += self._buffer[:safe]
+                            self._buffer = self._buffer[safe:]
+                    else:
+                        content_out += self._buffer
+                        self._buffer = ""
+                    break
+
+            if self._in_call:
+                # --- look for end token ---
+                if self._end:
+                    end_pos = self._buffer.find(self._end)
+                    if end_pos != -1:
+                        self._current_call_buf += self._buffer[:end_pos]
+                        self._buffer = self._buffer[end_pos + len(self._end) :]
+                        # Emit final tool call
+                        item = self._finalize_call()
+                        if item is not None:
+                            items.append(item)
+                        self._in_call = False
+                        self._current_tool_idx += 1
+                        continue  # there may be more calls
+                    else:
+                        # Accumulate and stream arguments
+                        self._current_call_buf += self._buffer
+                        self._buffer = ""
+                        item = self._stream_partial_call()
+                        if item is not None:
+                            items.append(item)
+                        break
+                else:
+                    # No end token (e.g. Llama3) — accumulate everything
+                    self._current_call_buf += self._buffer
+                    self._buffer = ""
+                    item = self._stream_partial_call()
+                    if item is not None:
+                        items.append(item)
+                    break
+
+        return content_out, items
+
+    def flush(self) -> List[ToolCallItem]:
+        """Flush any remaining buffered tool call (call at request end)."""
+        items: List[ToolCallItem] = []
+        if self._in_call and self._current_call_buf.strip():
+            item = self._finalize_call()
+            if item is not None:
+                items.append(item)
+            self._in_call = False
+        return items
+
+    # ------------------------------------------------------------------ #
+    # Internal helpers
+    # ------------------------------------------------------------------ #
+
+    def _parse_call_body(self, body: str) -> Optional[ToolCallItem]:
+        """Parse a single call body (JSON or qwen3_coder XML-style)."""
+        if self._model_type == "qwen3_coder":
+            return self._parse_qwen3_coder_body(body)
+        try:
+            obj = json.loads(body)
+        except json.JSONDecodeError:
+            return None
+        name = obj.get("name")
+        args = obj.get("arguments") or obj.get("parameters") or {}
+        if isinstance(args, dict):
+            args = json.dumps(args, ensure_ascii=False)
+        return ToolCallItem(name=name, arguments=args)
+
+    @staticmethod
+    def _parse_qwen3_coder_body(body: str) -> Optional[ToolCallItem]:
+        """Parse qwen3_coder XML-style: ``<function=NAME><parameter=K>V</parameter>...</function>``."""
+        # Extract function name
+        func_m = re.search(r"<function=([^>]+)>", body)
+        if func_m is None:
+            return None
+        name = func_m.group(1)
+        # Extract parameters
+        params: Dict[str, Any] = {}
+        for pm in re.finditer(
+            r"<parameter=([^>]+)>(.*?)(?:</parameter>|(?=<parameter=)|(?=</function>))",
+            body,
+            re.DOTALL,
+        ):
+            key = pm.group(1)
+            val = pm.group(2).strip()
+            # Try to parse as JSON value, otherwise keep as string
+            try:
+                params[key] = json.loads(val)
+            except (json.JSONDecodeError, ValueError):
+                params[key] = val
+        return ToolCallItem(
+            name=name,
+            arguments=json.dumps(params, ensure_ascii=False),
+        )
+
+    def _stream_partial_call(self) -> Optional[ToolCallItem]:
+        """Try to extract streaming information from the partial call."""
+        body = self._current_call_buf.strip()
+        if not body:
+            return None
+
+        # Try to extract name first
+        if not self._name_sent:
+            name = self._try_extract_name(body)
+            if name is not None:
+                self._name_sent = True
+                return ToolCallItem(
+                    name=name,
+                    arguments="",
+                    tool_call_id=_make_tool_call_id(),
+                    index=self._current_tool_idx,
+                )
+            return None
+
+        # Stream argument characters
+        args_str = self._try_extract_args_partial(body)
+        if args_str is not None and len(args_str) > self._prev_args_len:
+            new_chars = args_str[self._prev_args_len :]
+            self._prev_args_len = len(args_str)
+            return ToolCallItem(
+                name=None,
+                arguments=new_chars,
+                index=self._current_tool_idx,
+            )
+        return None
+
+    def _finalize_call(self) -> Optional[ToolCallItem]:
+        """Finalize a complete call — emit any remaining argument chars."""
+        parsed = self._parse_call_body(self._current_call_buf.strip())
+        if parsed is None:
+            return None
+
+        if not self._name_sent:
+            # Entire call came at once
+            parsed.index = self._current_tool_idx
+            parsed.tool_call_id = _make_tool_call_id()
+            return parsed
+
+        # Name was already sent — emit remaining arguments
+        full_args = parsed.arguments
+        new_chars = full_args[self._prev_args_len :]
+        if new_chars:
+            return ToolCallItem(
+                name=None,
+                arguments=new_chars,
+                index=self._current_tool_idx,
+            )
+        return None
+
+    def _try_extract_name(self, partial: str) -> Optional[str]:
+        """Try to extract function name from partial call body."""
+        if self._model_type == "qwen3_coder":
+            m = re.search(r"<function=([^>]+)>", partial)
+            return m.group(1) if m else None
+        m = re.search(r'"name"\s*:\s*"([^"]+)"', partial)
+        return m.group(1) if m else None
+
+    def _try_extract_args_partial(self, partial: str) -> Optional[str]:
+        """Try to extract partial arguments from call body."""
+        if self._model_type == "qwen3_coder":
+            # Build JSON incrementally from <parameter=K>V</parameter> tags
+            params: Dict[str, Any] = {}
+            for pm in re.finditer(
+                r"<parameter=([^>]+)>(.*?)(?:</parameter>)",
+                partial,
+                re.DOTALL,
+            ):
+                key = pm.group(1)
+                val = pm.group(2).strip()
+                try:
+                    params[key] = json.loads(val)
+                except (json.JSONDecodeError, ValueError):
+                    params[key] = val
+            if params:
+                return json.dumps(params, ensure_ascii=False)
+            return None
+        m = re.search(r'"arguments"\s*:\s*(\{.*)', partial, re.DOTALL)
+        if m:
+            return m.group(1)
+        m = re.search(r'"parameters"\s*:\s*(\{.*)', partial, re.DOTALL)
+        if m:
+            return m.group(1)
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_tool_call_id() -> str:
+    return f"call_{uuid.uuid4().hex[:24]}"
+
+
+def _could_be_partial(text: str, pattern: str) -> bool:
+    for i in range(1, len(pattern)):
+        if text.endswith(pattern[:i]):
+            return True
+    return False
diff --git a/pymllm/server/launch.py b/pymllm/server/launch.py
index 83a222f7..b9f60322 100644
--- a/pymllm/server/launch.py
+++ b/pymllm/server/launch.py
@@ -1,17 +1,936 @@
+"""pymllm HTTP server -- RESTful API entry point.
+
+This module implements a FastAPI-based HTTP server that wraps the pymllm
+:class:`Engine` and exposes OpenAI-compatible and native REST endpoints,
+following the architecture of sglang's ``http_server.py``.
+
+Endpoints
+---------
+* ``GET  /health``            -- liveness probe
+* ``GET  /v1/models``         -- list served models (OpenAI-compatible)
+* ``POST /generate``          -- native generate (streaming via SSE)
+* ``POST /v1/completions``    -- OpenAI-compatible completions
+* ``POST /v1/chat/completions`` -- OpenAI-compatible chat completions
+* ``GET  /model_info``        -- model metadata
+* ``GET  /server_info``       -- runtime config dump
+* ``POST /flush_cache``       -- flush internal caches
+* ``POST /abort_request``     -- cancel a running request
+"""
+
+import asyncio
+import logging
+import os
+import time
+import uuid
+from contextlib import asynccontextmanager
+from typing import Any, AsyncIterator, Dict, List, Optional, Union
+
+import orjson
+import uvicorn
+import uvloop
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import ORJSONResponse, Response, StreamingResponse
+from pydantic import BaseModel, Field
+
+from pymllm.configs.global_config import get_global_config, make_args, read_args
 from pymllm.engine.launch import Engine
-from pymllm.configs.global_config import make_args, read_args
+
+logger = logging.getLogger(__name__)
+asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+
+# ---------------------------------------------------------------------------
+# Global handles (populated at startup)
+# ---------------------------------------------------------------------------
+_engine: Optional[Engine] = None
+_tokenizer: Optional[Any] = None
+
+
+def _get_engine() -> Engine:
+    """Return the running engine or raise."""
+    if _engine is None:
+        raise RuntimeError("Engine not initialised")
+    return _engine
+
+
+# ---------------------------------------------------------------------------
+# Pydantic request / response models
+# ---------------------------------------------------------------------------
+
+
+class GenerateRequest(BaseModel):
+    """Body for ``POST /generate``."""
+
+    text: Optional[Union[List[str], str]] = None
+    input_ids: Optional[Union[List[List[int]], List[int]]] = None
+    sampling_params: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None
+    image_data: Optional[Any] = None
+    audio_data: Optional[Any] = None
+    video_data: Optional[Any] = None
+    return_logprob: Optional[Union[List[bool], bool]] = None
+    logprob_start_len: Optional[Union[List[int], int]] = None
+    top_logprobs_num: Optional[Union[List[int], int]] = None
+    lora_path: Optional[Union[List[Optional[str]], str]] = None
+    session_params: Optional[Union[List[Dict[str, Any]], Dict[str, Any]]] = None
+    stream: bool = False
+    rid: Optional[Union[List[str], str]] = None
+
+    model_config = {"extra": "allow"}  # forward unknown keys as extra_options
+
+
+# -- OpenAI-compatible models -----------------------------------------------
+
+
+class ImageUrl(BaseModel):
+    url: str
+    detail: Optional[str] = "auto"
+
+
+class ContentPart(BaseModel):
+    type: str
+    text: Optional[str] = None
+    image_url: Optional[ImageUrl] = None
+
+
+class ChatMessage(BaseModel):
+    role: str
+    content: Optional[Union[str, List[ContentPart]]] = None
+    name: Optional[str] = None
+    tool_calls: Optional[List[Any]] = None
+    tool_call_id: Optional[str] = None
+
+    model_config = {"extra": "allow"}
+
+
+class StreamOptions(BaseModel):
+    include_usage: Optional[bool] = False
+    continuous_usage_stats: Optional[bool] = False
+
+
+class ToolFunction(BaseModel):
+    name: str
+    description: Optional[str] = None
+    parameters: Optional[Dict[str, Any]] = None
+
+
+class Tool(BaseModel):
+    type: str = "function"
+    function: ToolFunction
+
+
+class ChatCompletionRequest(BaseModel):
+    """OpenAI ``POST /v1/chat/completions`` body."""
+
+    model: str = ""
+    messages: List[ChatMessage]
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    top_k: Optional[int] = None
+    max_tokens: Optional[int] = None
+    max_completion_tokens: Optional[int] = None
+    stream: bool = False
+    stream_options: Optional[StreamOptions] = None
+    stop: Optional[Union[str, List[str]]] = None
+    n: int = 1
+    frequency_penalty: Optional[float] = None
+    presence_penalty: Optional[float] = None
+    repetition_penalty: Optional[float] = None
+    seed: Optional[int] = None
+    logprobs: Optional[bool] = None
+    top_logprobs: Optional[int] = None
+    user: Optional[str] = None
+    # Tool calling
+    tools: Optional[List[Tool]] = None
+    tool_choice: Optional[Union[str, Dict[str, Any]]] = None
+    # Reasoning control
+    separate_reasoning: bool = True
+    stream_reasoning: bool = True
+    # Pass-through to tokenizer.apply_chat_template (e.g. enable_thinking)
+    chat_template_kwargs: Optional[Dict[str, Any]] = None
+
+    model_config = {"extra": "allow"}
+
+
+class CompletionRequest(BaseModel):
+    """OpenAI ``POST /v1/completions`` body."""
+
+    model: str = ""
+    prompt: Union[str, List[str]]
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    top_k: Optional[int] = None
+    max_tokens: Optional[int] = None
+    stream: bool = False
+    stream_options: Optional[StreamOptions] = None
+    stop: Optional[Union[str, List[str]]] = None
+    n: int = 1
+    frequency_penalty: Optional[float] = None
+    presence_penalty: Optional[float] = None
+    repetition_penalty: Optional[float] = None
+    seed: Optional[int] = None
+    echo: bool = False
+    logprobs: Optional[int] = None
+    user: Optional[str] = None
+
+    model_config = {"extra": "allow"}
+
+
+class AbortRequest(BaseModel):
+    rid: Optional[str] = None
+
+
+# ---------------------------------------------------------------------------
+# FastAPI application & lifespan
+# ---------------------------------------------------------------------------
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Startup / shutdown hooks for the FastAPI app."""
+    global _engine, _tokenizer
+    _engine = app.state.engine  # type: ignore[attr-defined]
+
+    # Load tokenizer in server process for apply_chat_template
+    cfg = get_global_config()
+    try:
+        from transformers import AutoTokenizer
+
+        _tokenizer = AutoTokenizer.from_pretrained(
+            str(cfg.server.tokenizer_path),
+            trust_remote_code=cfg.server.trust_remote_code,
+        )
+        logger.info(
+            "Loaded tokenizer for chat template: %s", cfg.server.tokenizer_path
+        )
+    except Exception as e:
+        logger.warning("Failed to load tokenizer for chat template: %s", e)
+
+    logger.info(
+        "HTTP server ready at http://%s:%s",
+        cfg.server.host,
+        cfg.server.port,
+    )
+    yield
+    # Shutdown
+    if _engine is not None:
+        _engine.shutdown()
+        _engine = None
+
+
+app = FastAPI(lifespan=lifespan)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+# ---------------------------------------------------------------------------
+# Exception handlers
+# ---------------------------------------------------------------------------
+
+
+@app.exception_handler(HTTPException)
+async def http_exception_handler(request: Request, exc: HTTPException):
+    return ORJSONResponse(
+        content={"error": {"message": exc.detail, "code": exc.status_code}},
+        status_code=exc.status_code,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Health / info endpoints
+# ---------------------------------------------------------------------------
+
+
+@app.get("/health")
+@app.get("/health_generate")
+async def health():
+    """Liveness probe."""
+    return Response(status_code=200)
+
+
+@app.get("/model_info")
+async def model_info():
+    """Return basic model metadata."""
+    cfg = get_global_config()
+    hf_cfg = cfg.model.hf_config
+    return {
+        "model_path": str(cfg.server.model_path),
+        "tokenizer_path": str(cfg.server.tokenizer_path),
+        "served_model_name": cfg.server.served_model_name,
+        "model_type": getattr(hf_cfg, "model_type", None) if hf_cfg else None,
+        "architectures": getattr(hf_cfg, "architectures", None) if hf_cfg else None,
+    }
+
+
+@app.get("/server_info")
+async def server_info():
+    """Dump runtime server configuration."""
+    import dataclasses as _dc
+
+    cfg = get_global_config()
+    return _dc.asdict(cfg.server)
+
+
+@app.get("/v1/models")
+async def list_models():
+    """OpenAI-compatible model listing."""
+    cfg = get_global_config()
+    model_name = cfg.server.served_model_name or str(cfg.server.model_path)
+    return {
+        "object": "list",
+        "data": [_model_card(model_name)],
+    }
+
+
+@app.get("/v1/models/{model_id:path}")
+async def retrieve_model(model_id: str):
+    """OpenAI-compatible single model retrieval."""
+    cfg = get_global_config()
+    model_name = cfg.server.served_model_name or str(cfg.server.model_path)
+    if model_id != model_name:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Model '{model_id}' not found. Available: '{model_name}'",
+        )
+    return _model_card(model_name)
+
+
+def _model_card(model_name: str) -> Dict[str, Any]:
+    """Build an OpenAI-compatible Model object."""
+    return {
+        "id": model_name,
+        "object": "model",
+        "created": int(time.time()),
+        "owned_by": "pymllm",
+    }
+
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+# Map internal finish reasons to OpenAI-standard values.
+_FINISH_REASON_MAP = {
+    "eos": "stop",
+    "stop": "stop",
+    "length": "length",
+    "abort": "stop",
+}
+
+
+def _normalize_finish_reason(reason: Optional[str]) -> Optional[str]:
+    """Convert internal finish reason to OpenAI-compatible value."""
+    if reason is None:
+        return None
+    return _FINISH_REASON_MAP.get(reason, reason)
+
+
+def _build_sampling_params(
+    temperature: Optional[float] = None,
+    top_p: Optional[float] = None,
+    top_k: Optional[int] = None,
+    max_tokens: Optional[int] = None,
+    stop: Optional[Union[str, List[str]]] = None,
+    frequency_penalty: Optional[float] = None,
+    presence_penalty: Optional[float] = None,
+    repetition_penalty: Optional[float] = None,
+    seed: Optional[int] = None,
+    **extra: Any,
+) -> Dict[str, Any]:
+    """Build a sampling_params dict from OpenAI-style fields."""
+    params: Dict[str, Any] = {}
+    if temperature is not None:
+        params["temperature"] = temperature
+    if top_p is not None:
+        params["top_p"] = top_p
+    if top_k is not None:
+        params["top_k"] = top_k
+    if max_tokens is not None:
+        params["max_new_tokens"] = max_tokens
+    if stop is not None:
+        params["stop"] = stop if isinstance(stop, list) else [stop]
+    if frequency_penalty is not None:
+        params["frequency_penalty"] = frequency_penalty
+    if presence_penalty is not None:
+        params["presence_penalty"] = presence_penalty
+    if repetition_penalty is not None:
+        params["repetition_penalty"] = repetition_penalty
+    if seed is not None:
+        params["seed"] = seed
+    params.update(extra)
+    return params
+
+
+def _messages_to_prompt(
+    messages: List[ChatMessage],
+    chat_template_kwargs: Optional[Dict[str, Any]] = None,
+) -> str:
+    """Render chat messages into a prompt string via the model's chat template.
+
+    Uses ``tokenizer.apply_chat_template()`` when available (handles Llama,
+    Qwen, Mistral, etc. automatically).  Falls back to ChatML format.
+
+    Parameters
+    ----------
+    chat_template_kwargs
+        Extra keyword arguments forwarded to ``apply_chat_template``
+        (e.g. ``enable_thinking=True`` for Qwen3).
+    """
+    # Flatten each message into a plain dict for the tokenizer.
+    msg_dicts: List[Dict[str, Any]] = []
+    for msg in messages:
+        content = msg.content
+        if isinstance(content, list):
+            # Multimodal: extract only text parts for the prompt string.
+            text_parts = [p.text for p in content if p.type == "text" and p.text]
+            content = "\n".join(text_parts) if text_parts else ""
+        elif content is None:
+            content = ""
+        d: Dict[str, Any] = {"role": msg.role, "content": content}
+        if msg.name is not None:
+            d["name"] = msg.name
+        msg_dicts.append(d)
+
+    tokenizer = _tokenizer
+    if tokenizer is not None and hasattr(tokenizer, "apply_chat_template"):
+        try:
+            extra = dict(chat_template_kwargs) if chat_template_kwargs else {}
+            return tokenizer.apply_chat_template(
+                msg_dicts,
+                tokenize=False,
+                add_generation_prompt=True,
+                **extra,
+            )
+        except Exception as e:
+            logger.warning("apply_chat_template failed, using fallback: %s", e)
+
+    # Fallback: ChatML format (Qwen-style)
+    parts: List[str] = []
+    for m in msg_dicts:
+        parts.append(f"<|im_start|>{m['role']}\n{m['content']}<|im_end|>")
+    parts.append("<|im_start|>assistant\n")
+    return "\n".join(parts)
+
+
+def _extract_image_data(messages: List[ChatMessage]) -> Optional[List[str]]:
+    """Extract image URLs / base64 strings from multimodal content parts."""
+    images: List[str] = []
+    for msg in messages:
+        if not isinstance(msg.content, list):
+            continue
+        for part in msg.content:
+            if part.type == "image_url" and part.image_url is not None:
+                images.append(part.image_url.url)
+    return images if images else None
+
+
+def _make_completion_id() -> str:
+    return f"cmpl-{uuid.uuid4().hex[:24]}"
+
+
+def _make_chat_completion_id() -> str:
+    return f"chatcmpl-{uuid.uuid4().hex[:24]}"
+
+
+# ---------------------------------------------------------------------------
+# Native generate endpoint
+# ---------------------------------------------------------------------------
+
+
+@app.api_route("/generate", methods=["POST", "PUT"])
+async def generate(obj: GenerateRequest, request: Request):
+    """Native generation endpoint.  Supports SSE streaming."""
+    engine = _get_engine()
+
+    # Collect extra fields as extra_options
+    known = set(GenerateRequest.model_fields.keys())
+    extra_options = {k: v for k, v in obj.model_dump().items() if k not in known}
+
+    kwargs: Dict[str, Any] = {
+        "prompt": obj.text,
+        "input_ids": obj.input_ids,
+        "sampling_params": obj.sampling_params,
+        "image_data": obj.image_data,
+        "audio_data": obj.audio_data,
+        "video_data": obj.video_data,
+        "return_logprob": obj.return_logprob,
+        "logprob_start_len": obj.logprob_start_len,
+        "top_logprobs_num": obj.top_logprobs_num,
+        "lora_path": obj.lora_path,
+        "session_params": obj.session_params,
+        "stream": obj.stream,
+        "rid": obj.rid,
+        **extra_options,
+    }
+    # Strip None values so Engine defaults are used
+    kwargs = {k: v for k, v in kwargs.items() if v is not None}
+
+    if obj.stream:
+
+        async def _stream() -> AsyncIterator[bytes]:
+            try:
+                async for chunk in engine.generate_async(**kwargs):
+                    if await request.is_disconnected():
+                        break
+                    # Skip empty intermediate chunks (e.g. special tokens
+                    # stripped by the detokenizer)
+                    if not chunk.get("delta") and not chunk.get("finished"):
+                        continue
+                    yield b"data: " + orjson.dumps(chunk) + b"\n\n"
+            except Exception as e:
+                err = {"error": {"message": str(e)}}
+                yield b"data: " + orjson.dumps(err) + b"\n\n"
+            yield b"data: [DONE]\n\n"
+
+        return StreamingResponse(_stream(), media_type="text/event-stream")
+
+    try:
+        results = []
+        async for item in engine.generate_async(**kwargs):
+            results.append(item)
+        result = results[0] if len(results) == 1 else results
+        return ORJSONResponse(result)
+    except Exception as e:
+        logger.error("[generate] Error: %s", e)
+        raise HTTPException(status_code=400, detail=str(e))
+
+
+# ---------------------------------------------------------------------------
+# OpenAI-compatible /v1/completions
+# ---------------------------------------------------------------------------
+
+
+@app.post("/v1/completions")
+async def openai_completions(obj: CompletionRequest, request: Request):
+    """OpenAI-compatible text completion endpoint."""
+    engine = _get_engine()
+    sp = _build_sampling_params(
+        temperature=obj.temperature,
+        top_p=obj.top_p,
+        top_k=obj.top_k,
+        max_tokens=obj.max_tokens,
+        stop=obj.stop,
+        frequency_penalty=obj.frequency_penalty,
+        presence_penalty=obj.presence_penalty,
+        repetition_penalty=obj.repetition_penalty,
+        seed=obj.seed,
+    )
+    cfg = get_global_config()
+    model_name = obj.model or cfg.server.served_model_name or str(cfg.server.model_path)
+    include_usage = (
+        obj.stream_options is not None and obj.stream_options.include_usage
+    )
+
+    if obj.stream:
+
+        async def _stream() -> AsyncIterator[bytes]:
+            comp_id = _make_completion_id()
+            prompt_tokens = 0
+            completion_tokens = 0
+            try:
+                async for chunk in engine.generate_async(
+                    prompt=obj.prompt, sampling_params=sp, stream=True
+                ):
+                    if await request.is_disconnected():
+                        break
+                    prompt_tokens = chunk.get("prompt_tokens", prompt_tokens)
+                    completion_tokens = chunk.get("completion_tokens", completion_tokens)
+                    delta_text = chunk.get("delta", "")
+                    finish_reason = _normalize_finish_reason(
+                        chunk.get("finished_reason")
+                    )
+                    # Skip empty intermediate chunks
+                    if not delta_text and finish_reason is None:
+                        continue
+                    sse: Dict[str, Any] = {
+                        "id": comp_id,
+                        "object": "text_completion",
+                        "created": int(time.time()),
+                        "model": model_name,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "text": delta_text,
+                                "logprobs": None,
+                                "finish_reason": finish_reason,
+                            }
+                        ],
+                    }
+                    yield b"data: " + orjson.dumps(sse) + b"\n\n"
+            except Exception as e:
+                err = {"error": {"message": str(e)}}
+                yield b"data: " + orjson.dumps(err) + b"\n\n"
+            # Final usage-only chunk (OpenAI stream_options.include_usage)
+            if include_usage:
+                usage_chunk: Dict[str, Any] = {
+                    "id": comp_id,
+                    "object": "text_completion",
+                    "created": int(time.time()),
+                    "model": model_name,
+                    "choices": [],
+                    "usage": {
+                        "prompt_tokens": prompt_tokens,
+                        "completion_tokens": completion_tokens,
+                        "total_tokens": prompt_tokens + completion_tokens,
+                    },
+                }
+                yield b"data: " + orjson.dumps(usage_chunk) + b"\n\n"
+            yield b"data: [DONE]\n\n"
+
+        return StreamingResponse(_stream(), media_type="text/event-stream")
+
+    try:
+        results = []
+        async for item in engine.generate_async(
+            prompt=obj.prompt, sampling_params=sp
+        ):
+            results.append(item)
+        choices = []
+        prompt_tokens = 0
+        completion_tokens = 0
+        for i, r in enumerate(results):
+            choices.append(
+                {
+                    "index": i,
+                    "text": r.get("text", ""),
+                    "logprobs": None,
+                    "finish_reason": _normalize_finish_reason(
+                        r.get("finished_reason", "stop")
+                    ),
+                }
+            )
+            prompt_tokens += r.get("prompt_tokens", 0)
+            completion_tokens += r.get("completion_tokens", 0)
+
+        return ORJSONResponse(
+            {
+                "id": _make_completion_id(),
+                "object": "text_completion",
+                "created": int(time.time()),
+                "model": model_name,
+                "choices": choices,
+                "usage": {
+                    "prompt_tokens": prompt_tokens,
+                    "completion_tokens": completion_tokens,
+                    "total_tokens": prompt_tokens + completion_tokens,
+                },
+            }
+        )
+    except Exception as e:
+        logger.error("[v1/completions] Error: %s", e)
+        raise HTTPException(status_code=400, detail=str(e))
+
+
+# ---------------------------------------------------------------------------
+# OpenAI-compatible /v1/chat/completions
+# ---------------------------------------------------------------------------
+
+
+@app.post("/v1/chat/completions")
+async def openai_chat_completions(obj: ChatCompletionRequest, request: Request):
+    """OpenAI-compatible chat completion endpoint with reasoning & tool-call parsing."""
+    engine = _get_engine()
+    cfg = get_global_config()
+    # Auto-enable thinking when reasoning_parser is configured and the
+    # client didn't explicitly set enable_thinking.
+    chat_kwargs = dict(obj.chat_template_kwargs) if obj.chat_template_kwargs else {}
+    if cfg.server.reasoning_parser and "enable_thinking" not in chat_kwargs:
+        chat_kwargs["enable_thinking"] = True
+    prompt = _messages_to_prompt(obj.messages, chat_template_kwargs=chat_kwargs or None)
+    image_data = _extract_image_data(obj.messages)
+
+    # max_completion_tokens takes precedence over max_tokens (OpenAI convention)
+    max_tokens = obj.max_completion_tokens if obj.max_completion_tokens is not None else obj.max_tokens
+
+    sp = _build_sampling_params(
+        temperature=obj.temperature,
+        top_p=obj.top_p,
+        top_k=obj.top_k,
+        max_tokens=max_tokens,
+        stop=obj.stop,
+        frequency_penalty=obj.frequency_penalty,
+        presence_penalty=obj.presence_penalty,
+        repetition_penalty=obj.repetition_penalty,
+        seed=obj.seed,
+    )
+    cfg = get_global_config()
+    model_name = obj.model or cfg.server.served_model_name or str(cfg.server.model_path)
+    include_usage = (
+        obj.stream_options is not None and obj.stream_options.include_usage
+    )
+
+    # Resolve parsers from server config
+    reasoning_type = cfg.server.reasoning_parser
+    tool_call_type = cfg.server.tool_call_parser
+
+    gen_kwargs: Dict[str, Any] = {
+        "prompt": prompt,
+        "sampling_params": sp,
+    }
+    if image_data is not None:
+        gen_kwargs["image_data"] = image_data
+
+    if obj.stream:
+
+        async def _stream() -> AsyncIterator[bytes]:
+            from pymllm.parsers import ReasoningParser, ToolCallParser
+
+            comp_id = _make_chat_completion_id()
+            created = int(time.time())
+            first = True
+            prompt_tokens = 0
+            completion_tokens = 0
+            has_tool_calls = False  # track across entire stream
+
+            # Instantiate streaming parsers
+            r_parser = (
+                ReasoningParser(reasoning_type, stream_reasoning=obj.stream_reasoning)
+                if reasoning_type and obj.separate_reasoning
+                else None
+            )
+            tc_parser = (
+                ToolCallParser(tool_call_type, tools=obj.tools)
+                if tool_call_type and obj.tools
+                else None
+            )
+
+            def _make_sse(delta: Dict[str, Any], finish: Optional[str] = None) -> bytes:
+                sse: Dict[str, Any] = {
+                    "id": comp_id,
+                    "object": "chat.completion.chunk",
+                    "created": created,
+                    "model": model_name,
+                    "choices": [
+                        {
+                            "index": 0,
+                            "delta": delta,
+                            "logprobs": None,
+                            "finish_reason": finish,
+                        }
+                    ],
+                }
+                return b"data: " + orjson.dumps(sse) + b"\n\n"
+
+            try:
+                async for chunk in engine.generate_async(**gen_kwargs, stream=True):
+                    if await request.is_disconnected():
+                        break
+                    prompt_tokens = chunk.get("prompt_tokens", prompt_tokens)
+                    completion_tokens = chunk.get("completion_tokens", completion_tokens)
+
+                    raw_delta = chunk.get("delta", "")
+                    finish_reason = _normalize_finish_reason(
+                        chunk.get("finished_reason")
+                    )
+
+                    # --- Phase 1: reasoning parser ---
+                    reasoning_delta = ""
+                    content_delta = raw_delta
+                    if r_parser and raw_delta:
+                        reasoning_delta, content_delta = r_parser.parse_stream_chunk(
+                            raw_delta
+                        )
+
+                    # --- Phase 2: tool-call parser ---
+                    tool_items: list = []
+                    if tc_parser and content_delta:
+                        content_delta, tool_items = tc_parser.parse_stream_chunk(
+                            content_delta
+                        )
+
+                    # --- Emit chunks ---
+                    # Role chunk (first)
+                    if first:
+                        yield _make_sse({"role": "assistant"})
+                        first = False
+
+                    # Reasoning content
+                    if reasoning_delta:
+                        yield _make_sse({"reasoning_content": reasoning_delta})
+
+                    # Tool call deltas
+                    if tool_items:
+                        has_tool_calls = True
+                    for tc in tool_items:
+                        yield _make_sse({"tool_calls": [tc.to_openai_dict()]})
+
+                    # Normal content
+                    if content_delta:
+                        yield _make_sse({"content": content_delta})
+
+                    # Finish
+                    if finish_reason is not None:
+                        # Flush remaining tool call data
+                        if tc_parser:
+                            remaining = tc_parser.flush()
+                            for tc in remaining:
+                                has_tool_calls = True
+                                yield _make_sse({"tool_calls": [tc.to_openai_dict()]})
+                            if has_tool_calls:
+                                finish_reason = "tool_calls"
+                        yield _make_sse({}, finish=finish_reason)
+
+            except Exception as e:
+                err = {"error": {"message": str(e)}}
+                yield b"data: " + orjson.dumps(err) + b"\n\n"
+            # Final usage-only chunk
+            if include_usage:
+                usage_chunk: Dict[str, Any] = {
+                    "id": comp_id,
+                    "object": "chat.completion.chunk",
+                    "created": created,
+                    "model": model_name,
+                    "choices": [],
+                    "usage": {
+                        "prompt_tokens": prompt_tokens,
+                        "completion_tokens": completion_tokens,
+                        "total_tokens": prompt_tokens + completion_tokens,
+                    },
+                }
+                yield b"data: " + orjson.dumps(usage_chunk) + b"\n\n"
+            yield b"data: [DONE]\n\n"
+
+        return StreamingResponse(_stream(), media_type="text/event-stream")
+
+    # -- Non-streaming --
+    try:
+        from pymllm.parsers import ReasoningParser, ToolCallParser
+
+        r = {}
+        async for item in engine.generate_async(**gen_kwargs):
+            r = item
+        prompt_tokens = r.get("prompt_tokens", 0)
+        completion_tokens = r.get("completion_tokens", 0)
+        text = r.get("text", "")
+        finish_reason = _normalize_finish_reason(r.get("finished_reason", "stop"))
+
+        # Parse reasoning
+        reasoning_content = None
+        if reasoning_type and obj.separate_reasoning:
+            rp = ReasoningParser(reasoning_type)
+            reasoning_content, text = rp.parse_non_stream(text)
+
+        # Parse tool calls
+        tool_calls_list = None
+        if tool_call_type and obj.tools:
+            tp = ToolCallParser(tool_call_type, tools=obj.tools)
+            if tp.has_tool_call(text):
+                text, parsed_calls = tp.parse_non_stream(text)
+                if parsed_calls:
+                    tool_calls_list = [tc.to_openai_dict(streaming=False) for tc in parsed_calls]
+                    finish_reason = "tool_calls"
+
+        message: Dict[str, Any] = {"role": "assistant", "content": text or None}
+        if reasoning_content:
+            message["reasoning_content"] = reasoning_content
+        if tool_calls_list:
+            message["tool_calls"] = tool_calls_list
+
+        return ORJSONResponse(
+            {
+                "id": _make_chat_completion_id(),
+                "object": "chat.completion",
+                "created": int(time.time()),
+                "model": model_name,
+                "choices": [
+                    {
+                        "index": 0,
+                        "message": message,
+                        "logprobs": None,
+                        "finish_reason": finish_reason,
+                    }
+                ],
+                "usage": {
+                    "prompt_tokens": prompt_tokens,
+                    "completion_tokens": completion_tokens,
+                    "total_tokens": prompt_tokens + completion_tokens,
+                },
+            }
+        )
+    except Exception as e:
+        logger.error("[v1/chat/completions] Error: %s", e)
+        raise HTTPException(status_code=400, detail=str(e))
+
+
+# ---------------------------------------------------------------------------
+# Administrative endpoints
+# ---------------------------------------------------------------------------
+
+
+@app.api_route("/flush_cache", methods=["GET", "POST"])
+async def flush_cache():
+    """Placeholder cache flush."""
+    return Response(content="Cache flushed.\n", status_code=200)
+
+
+@app.post("/abort_request")
+async def abort_request(obj: AbortRequest):
+    """Abort a running request by rid."""
+    engine = _get_engine()
+    if obj.rid and engine._rr_process is not None:
+        await engine._rr_process.abort_request(obj.rid)
+        return Response(status_code=200)
+    raise HTTPException(status_code=400, detail="Missing or invalid rid")
+
+
+# ---------------------------------------------------------------------------
+# Prepare args helper
+# ---------------------------------------------------------------------------
 
 
 def _prepare_args():
+    """Parse CLI arguments into the global config singleton."""
     parser = make_args()
     read_args(parser=parser)
 
 
-def main():
+# ---------------------------------------------------------------------------
+# Server launcher
+# ---------------------------------------------------------------------------
+
+
+def launch_server():
+    """Launch the pymllm Engine then start the uvicorn HTTP server.
+
+    This function mirrors sglang's ``launch_server``: it first boots all engine
+    subprocesses (tokenizer, scheduler, model-runner, detokenizer) and then
+    hands off to uvicorn to serve HTTP traffic.
+    """
     _prepare_args()
+    cfg = get_global_config()
+
     engine = Engine()
     engine.launch()
 
+    # Attach engine to app.state so the lifespan hook can pick it up.
+    app.state.engine = engine  # type: ignore[attr-defined]
+
+    logger.info(
+        "Starting HTTP server on %s:%s (root_path=%r)",
+        cfg.server.host,
+        cfg.server.port,
+        cfg.server.fastapi_root_path,
+    )
+
+    uvicorn.run(
+        app,
+        host=cfg.server.host,
+        port=cfg.server.port,
+        root_path=cfg.server.fastapi_root_path,
+        log_level=cfg.server.log_level,
+        timeout_keep_alive=5,
+        loop="uvloop",
+    )
+
+
+def main():
+    """CLI entry point."""
+    launch_server()
+
 
 if __name__ == "__main__":
     main()