diff --git a/examples/scripts/code_runner.py b/examples/scripts/code_runner.py index 3870b0fb..7646455b 100644 --- a/examples/scripts/code_runner.py +++ b/examples/scripts/code_runner.py @@ -570,7 +570,9 @@ def run(self) -> None: runtime_dir = os.path.join(self.project_root, "src", "runtime", self.runtime_name, "runtime") runtime_include_dirs.append(runtime_dir) - for kernel in self.kernels: + from concurrent.futures import ThreadPoolExecutor + + def _compile_one_kernel(kernel): logger.info(f"Compiling kernel: {kernel['source']} (func_id={kernel['func_id']})") incore_o = kernel_compiler.compile_incore( kernel["source"], @@ -584,8 +586,10 @@ def run(self) -> None: kernel_bin = incore_o # Complete .so for dlopen else: kernel_bin = extract_text_section(incore_o) # .text only for mmap + return (kernel["func_id"], kernel_bin) - kernel_binaries.append((kernel["func_id"], kernel_bin)) + with ThreadPoolExecutor(max_workers=len(self.kernels)) as executor: + kernel_binaries = list(executor.map(_compile_one_kernel, self.kernels)) logger.info(f"Compiled {len(kernel_binaries)} kernel(s)") diff --git a/python/kernel_compiler.py b/python/kernel_compiler.py index e9f31a99..f4c78b26 100644 --- a/python/kernel_compiler.py +++ b/python/kernel_compiler.py @@ -148,19 +148,7 @@ def _run_subprocess( label: str, error_hint: str = "Compiler not found" ) -> subprocess.CompletedProcess: - """Run a subprocess command with standardized logging and error handling. - - Args: - cmd: Command and arguments - label: Label for log messages (e.g., "Incore", "Orchestration") - error_hint: Message for FileNotFoundError - - Returns: - CompletedProcess on success - - Raises: - RuntimeError: If command fails or executable not found - """ + """Run a subprocess command with standardized logging and error handling.""" try: result = subprocess.run(cmd, capture_output=True, text=True) diff --git a/python/runtime_builder.py b/python/runtime_builder.py index 715bbda5..495ca3ee 100644 --- a/python/runtime_builder.py +++ b/python/runtime_builder.py @@ -1,6 +1,7 @@ import importlib.util import logging from pathlib import Path +from concurrent.futures import ThreadPoolExecutor from runtime_compiler import RuntimeCompiler from kernel_compiler import KernelCompiler @@ -83,26 +84,30 @@ def build(self, name: str) -> tuple: compiler = self._runtime_compiler - # Compile AICore kernel - logger.info("[1/3] Compiling AICore kernel...") + # Prepare configs for all three targets aicore_cfg = build_config["aicore"] aicore_include_dirs = [str((config_dir / p).resolve()) for p in aicore_cfg["include_dirs"]] aicore_source_dirs = [str((config_dir / p).resolve()) for p in aicore_cfg["source_dirs"]] - aicore_binary = compiler.compile("aicore", aicore_include_dirs, aicore_source_dirs) - # Compile AICPU kernel - logger.info("[2/3] Compiling AICPU kernel...") aicpu_cfg = build_config["aicpu"] aicpu_include_dirs = [str((config_dir / p).resolve()) for p in aicpu_cfg["include_dirs"]] aicpu_source_dirs = [str((config_dir / p).resolve()) for p in aicpu_cfg["source_dirs"]] - aicpu_binary = compiler.compile("aicpu", aicpu_include_dirs, aicpu_source_dirs) - # Compile Host runtime - logger.info("[3/3] Compiling Host runtime...") host_cfg = build_config["host"] host_include_dirs = [str((config_dir / p).resolve()) for p in host_cfg["include_dirs"]] host_source_dirs = [str((config_dir / p).resolve()) for p in host_cfg["source_dirs"]] - host_binary = compiler.compile("host", host_include_dirs, host_source_dirs) + + # Compile all three targets in parallel + logger.info("Compiling AICore, AICPU, Host in parallel...") + + with ThreadPoolExecutor(max_workers=3) as executor: + fut_aicore = executor.submit(compiler.compile, "aicore", aicore_include_dirs, aicore_source_dirs) + fut_aicpu = executor.submit(compiler.compile, "aicpu", aicpu_include_dirs, aicpu_source_dirs) + fut_host = executor.submit(compiler.compile, "host", host_include_dirs, host_source_dirs) + + aicore_binary = fut_aicore.result() + aicpu_binary = fut_aicpu.result() + host_binary = fut_host.result() logger.info("Build complete!") return (host_binary, aicpu_binary, aicore_binary) diff --git a/python/runtime_compiler.py b/python/runtime_compiler.py index 97c3bea8..dbfecd15 100644 --- a/python/runtime_compiler.py +++ b/python/runtime_compiler.py @@ -6,6 +6,7 @@ from typing import List from toolchain import Toolchain, CCECToolchain, Aarch64GxxToolchain, GxxToolchain import env_manager +import multiprocessing logger = logging.getLogger(__name__) @@ -250,7 +251,7 @@ def _run_compilation( cmake_cmd = ["cmake", cmake_source_dir] + cmake_args self._run_build_step(cmake_cmd, build_dir, platform, "CMake configuration") - make_cmd = ["make", "VERBOSE=1"] + make_cmd = ["make", f"-j{min(multiprocessing.cpu_count(), 32)}", "VERBOSE=1"] self._run_build_step(make_cmd, build_dir, platform, "Make build") # Read the compiled binary