diff --git a/app.cfg.example b/app.cfg.example index 63905832..62caa332 100644 --- a/app.cfg.example +++ b/app.cfg.example @@ -305,19 +305,59 @@ signing = [architecturetargets] -# defines for which architectures the bot will build and what job submission -# parameters shall be used to allocate a compute node with the correct -arch_target_map = { - "linux/x86_64/generic": "--partition x86-64-generic-node", - "linux/x86_64/amd/zen2": "--partition x86-64-amd-zen2-node" } +# arch_target_map has been replaced by node_type_map +# arch_target_map = { +# } +# Each entry in the node_type_map dictionary describes a build node type. The key is a (descriptive) name for this build node, and its value is a dictionary containing the following build node properties as key-value pairs: + - os: its operating system (os) + - cpu_subdir: its CPU architecture + - slurm_params: the SLURM parameters that need to be passed to submit jobs to it + - repo_targets: supported repository targets for this node type + - accel (optional): which accelerators this node has +# All are strings, except repo_targets, which is a list of strings. +# Note that the Slurm parameters should typically be chosen such that a single type of node (with one specific type of +# CPU and one specific type of GPU) should be allocated. +# Below is an example configuration for a system that contains 4 types of nodes: zen2 CPU nodes, zen4 CPU nodes, +# GPU nodes with an icelake CPU and A100 GPU, GPU nodes with a zen4 CPU and an H100 GPU. +# The 'on:' argument to the bot build command determines which node type will be allocated for the build job, +# e.g. 'bot:build on:arch=zen4,accel=nvidia/cc90 for:...' will match the gpu_h100 node type below. +# If no 'on:' argument is passed to the build command, the 'for:' argument is used instead, +# e.g. 'bot:build for:arch=icelake,accel=nvidia/cc80' will match the gpu_a100 node type below. +node_type_map = { + "cpu_zen2": { + "os": "linux", + "cpu_subdir": "x86_64/amd/zen2", + "slurm_params": "-p rome --nodes 1 --ntasks-per-node 16 --cpus-per-task 1", + "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] + }, + "cpu_zen4": { + "os": "linux", + "cpu_subdir": "x86_64/amd/zen4", + "accel": "None", + "slurm_params": "-p genoa --nodes 1 --ntasks-per-node 24 --cpus-per-task 1", + "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] + }, + "gpu_a100": { + "os": "linux", + "cpu_subdir": "x86_64/intel/icelake", + "accel": "nvidia/cc80", + "slurm_params": "-p gpu_a100 --nodes 1 --tasks-per-node 18 --cpus-per-task 1 --gpus-per-node 1", + "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] + }, + "gpu_h100": { + "os": "linux", + "cpu_subdir": "x86_64/amd/zen4", + "accel": "nvidia/cc90", + "slurm_params": "-p gpu_h100 --nodes 1 --tasks-per-node 16 --cpus-per-task 1 --gpus-per-node 1", + "repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"] + }} [repo_targets] -# defines for which repository a arch_target should be build for -# -# EESSI/2023.06 and EESSI/2025.06 -repo_target_map = { - "linux/x86_64/amd/zen2" : ["eessi.io-2023.06-software","eessi.io-2025.06-software"] } + +# No longer used, repo targets are now specified per node type in the node_type_map +# repo_target_map = { +# "linux/x86_64/amd/zen2" : ["eessi.io-2023.06-software","eessi.io-2025.06-software"] } # points to definition of repositories (default repository defined by build container) repos_cfg_dir = PATH_TO_SHARED_DIRECTORY/repos @@ -360,8 +400,12 @@ scontrol_command = /usr/bin/scontrol # awaits_release = job id `{job_id}` awaits release by job manager awaits_release_delayed_begin_msg = job id `{job_id}` will be eligible to start in about {delay_seconds} seconds awaits_release_hold_release_msg = job id `{job_id}` awaits release by job manager -initial_comment = New job on instance `{app_name}` for CPU micro-architecture `{arch_name}`{accelerator_spec} for repository `{repo_id}` in job dir `{symlink}` +new_job_instance_repo = New job on instance `{app_name}` for repository `{repo_id}` +build_on_arch = Building on: `{on_arch}`{on_accelerator} +build_for_arch = Building for: `{for_arch}`{for_accelerator} +jobdir = Job dir: `{symlink}` with_accelerator =  and accelerator `{accelerator}` +# initial_comment = New job on instance `{app_name}` for repository `{repo_id}`\nBuilding on: `{on_arch}`{on_accelerator}\nBuilding for: `{for_arch}`{for_accelerator}\nJob dir: `{symlink}` # no longer used [new_job_comments] diff --git a/eessi_bot_event_handler.py b/eessi_bot_event_handler.py index b1d4123a..a8beff82 100644 --- a/eessi_bot_event_handler.py +++ b/eessi_bot_event_handler.py @@ -29,8 +29,8 @@ # Local application imports (anything from EESSI/eessi-bot-software-layer) from connections import github -from tasks.build import check_build_permission, get_architecture_targets, get_repo_cfg, \ - request_bot_build_issue_comments, submit_build_jobs +from tasks.build import check_build_permission, get_node_types, request_bot_build_issue_comments, \ + submit_build_jobs from tasks.deploy import deploy_built_artefacts, determine_job_dirs from tasks.clean_up import move_to_trash_bin from tools import config @@ -43,7 +43,7 @@ REQUIRED_CONFIG = { config.SECTION_ARCHITECTURETARGETS: [ - config.ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP], # required + config.NODE_TYPE_MAP], # required config.SECTION_BOT_CONTROL: [ # config.BOT_CONTROL_SETTING_CHATLEVEL, # optional config.BOT_CONTROL_SETTING_COMMAND_PERMISSION, # required @@ -104,10 +104,12 @@ config.SECTION_JOB_MANAGER: [ config.JOB_MANAGER_SETTING_POLL_INTERVAL], # required config.SECTION_REPO_TARGETS: [ - config.REPO_TARGETS_SETTING_REPO_TARGET_MAP, # required config.REPO_TARGETS_SETTING_REPOS_CFG_DIR], # required config.SECTION_SUBMITTED_JOB_COMMENTS: [ - config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT, # required + config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO, # required + config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH, # required + config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH, # required + config.SUBMITTED_JOB_COMMENTS_SETTING_JOBDIR, # required # config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE, # optional config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_DELAYED_BEGIN_MSG, # required config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_HOLD_RELEASE_MSG, # required @@ -411,23 +413,21 @@ def handle_pull_request_opened_event(self, event_info, pr, req_chatlevel=ChatLev app_name = self.cfg[config.SECTION_GITHUB][config.GITHUB_SETTING_APP_NAME] # TODO check if PR already has a comment with arch targets and # repositories - arch_map = get_architecture_targets(self.cfg) - repo_cfg = get_repo_cfg(self.cfg) - - comment = f"Instance `{app_name}` is configured to build for:" - architectures = ['/'.join(arch.split('/')[1:]) for arch in arch_map.keys()] - comment += "\n- architectures: " - if len(architectures) > 0: - comment += f"{', '.join([f'`{arch}`' for arch in architectures])}" - else: - comment += "none" - repositories = list(set([repo_id for repo_ids in repo_cfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP].values() - for repo_id in repo_ids])) - comment += "\n- repositories: " - if len(repositories) > 0: - comment += f"{', '.join([f'`{repo_id}`' for repo_id in repositories])}" - else: - comment += "none" + node_map = get_node_types(self.cfg) + + comment = f"Instance `{app_name}` is configured to build on:" + for node in node_map: + comment += f"\n- Node type `{node}`:" + current_node_type = node_map[node] + if "os" in current_node_type: + comment += f"\n - OS: `{current_node_type['os']}`" + if "cpu_subdir" in current_node_type: + comment += f"\n - CPU architecture: `{current_node_type['cpu_subdir']}`" + if "repo_targets" in current_node_type: + comment += f"\n - Repositories: `{current_node_type['repo_targets']}`" + if "accel" in current_node_type: + comment += f"\n - Accelerators: `{current_node_type['accel']}`" + comment += "\n" self.log(f"PR opened: comment '{comment}'") @@ -532,7 +532,7 @@ def handle_bot_command_build(self, event_info, bot_command): build_msg = '' if check_build_permission(pr, event_info): # use filter from command - submitted_jobs = submit_build_jobs(pr, event_info, bot_command.action_filters) + submitted_jobs = submit_build_jobs(pr, event_info, bot_command.action_filters, bot_command.build_params) if submitted_jobs is None or len(submitted_jobs) == 0: build_msg = "\n - no jobs were submitted" else: @@ -578,8 +578,8 @@ def handle_bot_command_status(self, event_info, bot_command): bot_command (EESSIBotCommand): command to be handled Returns: - github.IssueComment.IssueComment (note, github refers to - PyGithub, not the github from the internal connections module) + (string): list item with a link to the issue comment that was created + containing the status overview """ self.log("processing bot command 'status'") repo_name = event_info['raw_request_body']['repository']['full_name'] @@ -588,10 +588,12 @@ def handle_bot_command_status(self, event_info, bot_command): comment_status = '' comment_status += "\nThis is the status of all the `bot: build` commands:" - comment_status += "\n|arch|result|date|status|url|" - comment_status += "\n|----|------|----|------|---|" + comment_status += "\n|on|for|repo|result|date|status|url|" + comment_status += "\n|----|----|----|------|----|------|---|" for x in range(0, len(status_table['date'])): - comment_status += f"\n|{status_table['arch'][x]}|" + comment_status += f"\n|{status_table['on arch'][x]}|" + comment_status += f"{status_table['for arch'][x]}|" + comment_status += f"{status_table['for repo'][x]}|" comment_status += f"{status_table['result'][x]}|" comment_status += f"{status_table['date'][x]}|" comment_status += f"{status_table['status'][x]}|" @@ -599,7 +601,10 @@ def handle_bot_command_status(self, event_info, bot_command): self.log(f"Overview of finished builds: comment '{comment_status}'") issue_comment = create_comment(repo_name, pr_number, comment_status, ChatLevels.MINIMAL) - return issue_comment + if issue_comment: + return f"\n - added status comment {issue_comment.html_url}" + else: + return "\n - failed to create status comment" def start(self, app, port=3000): """ @@ -692,7 +697,7 @@ def main(): opts = event_handler_parse() # config is read and checked for settings to raise an exception early when the event_handler starts. - if config.check_required_cfg_settings(REQUIRED_CONFIG): + if config.check_cfg_settings(REQUIRED_CONFIG): print("Configuration check: PASSED") else: print("Configuration check: FAILED") diff --git a/eessi_bot_job_manager.py b/eessi_bot_job_manager.py index 4fcf9af3..1efb9d85 100644 --- a/eessi_bot_job_manager.py +++ b/eessi_bot_job_manager.py @@ -623,7 +623,7 @@ def main(): # config is read and checked for settings to raise an exception early when # the job_manager runs - if config.check_required_cfg_settings(REQUIRED_CONFIG): + if config.check_cfg_settings(REQUIRED_CONFIG): print("Configuration check: PASSED") else: print("Configuration check: FAILED") diff --git a/tasks/build.py b/tasks/build.py index 836230cd..6148227e 100644 --- a/tasks/build.py +++ b/tasks/build.py @@ -23,7 +23,9 @@ from datetime import datetime, timezone import json import os +import re import shutil +import string import sys # Third party imports (anything installed into the local Python environment) @@ -33,7 +35,7 @@ from tools import config, cvmfs_repository, job_metadata, pr_comments, run_cmd import tools.filter as tools_filter from tools.pr_comments import ChatLevels, create_comment - +from tools.build_params import BUILD_PARAM_ARCH, BUILD_PARAM_ACCEL # defaults (used if not specified via, eg, 'app.cfg') DEFAULT_JOB_TIME_LIMIT = "24:00:00" @@ -179,26 +181,25 @@ def get_build_env_cfg(cfg): return config_data -def get_architecture_targets(cfg): - """ - Obtain mappings of architecture targets to Slurm parameters +def get_node_types(cfg): + """Obtain mappings of node types to Slurm parameters Args: cfg (ConfigParser): ConfigParser instance holding full configuration (typically read from 'app.cfg') Returns: - (dict): dictionary mapping architecture targets (format - OS/SOFTWARE_SUBDIR) to architecture specific Slurm job submission - parameters + (dict): Dictionary mapping node types names (arbitrary text) node properties + such as the OS, CPU software subdir, supported repositories, accelerator (optionally) + as well as the slurm parameters to allocate such a type of node """ fn = sys._getframe().f_code.co_name - architecture_targets = cfg[config.SECTION_ARCHITECTURETARGETS] + node_types = cfg[config.SECTION_ARCHITECTURETARGETS] - arch_target_map = json.loads(architecture_targets.get(config.ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP)) - log(f"{fn}(): arch target map '{json.dumps(arch_target_map)}'") - return arch_target_map + node_type_map = json.loads(node_types.get(config.NODE_TYPE_MAP)) + log(f"{fn}(): node type map '{json.dumps(node_type_map)}'") + return node_type_map def get_allowed_exportvars(cfg): @@ -241,8 +242,6 @@ def get_repo_cfg(cfg): Returns: (dict): dictionary containing repository settings as follows - {config.REPO_TARGETS_SETTING_REPOS_CFG_DIR: path to repository config directory as defined in 'app.cfg'} - - {config.REPO_TARGETS_SETTING_REPO_TARGET_MAP: json of - config.REPO_TARGETS_SETTING_REPO_TARGET_MAP value as defined in 'app.cfg'} - for all sections [repo_id] defined in config.REPO_TARGETS_SETTING_REPOS_CFG_DIR/repos.cfg add a mapping {repo_id: dictionary containing settings of that section} """ @@ -259,21 +258,6 @@ def get_repo_cfg(cfg): settings_repos_cfg_dir = config.REPO_TARGETS_SETTING_REPOS_CFG_DIR repo_cfg[settings_repos_cfg_dir] = repo_cfg_org.get(settings_repos_cfg_dir, None) - repo_map = {} - try: - repo_map_str = repo_cfg_org.get(config.REPO_TARGETS_SETTING_REPO_TARGET_MAP) - log(f"{fn}(): repo_map '{repo_map_str}'") - - if repo_map_str is not None: - repo_map = json.loads(repo_map_str) - - log(f"{fn}(): repo_map '{json.dumps(repo_map)}'") - except json.JSONDecodeError as err: - print(err) - error(f"{fn}(): Value for repo_map ({repo_map_str}) could not be decoded.") - - repo_cfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP] = repo_map - if repo_cfg[config.REPO_TARGETS_SETTING_REPOS_CFG_DIR] is None: return repo_cfg @@ -568,7 +552,7 @@ def prepare_export_vars_file(job_dir, exportvars): log(f"{fn}(): created exported variables file {export_vars_path}") -def prepare_jobs(pr, cfg, event_info, action_filter): +def prepare_jobs(pr, cfg, event_info, action_filter, build_params): """ Prepare all jobs whose context matches the given filter. Preparation includes creating a working directory for a job, downloading the pull request into @@ -579,6 +563,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter): cfg (ConfigParser): instance holding full configuration (typically read from 'app.cfg') event_info (dict): event received by event_handler action_filter (EESSIBotActionFilter): used to filter which jobs shall be prepared + build_params (EESSIBotBuildParams): dict that contains the build parameters for the job Returns: (list): list of the prepared jobs @@ -587,7 +572,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter): app_name = cfg[config.SECTION_GITHUB].get(config.GITHUB_SETTING_APP_NAME) build_env_cfg = get_build_env_cfg(cfg) - arch_map = get_architecture_targets(cfg) + node_map = get_node_types(cfg) repocfg = get_repo_cfg(cfg) allowed_exportvars = get_allowed_exportvars(cfg) @@ -627,13 +612,25 @@ def prepare_jobs(pr, cfg, event_info, action_filter): return [] jobs = [] - for arch, slurm_opt in arch_map.items(): - arch_dir = arch.replace('/', '_') - # check if repo_target_map contains an entry for {arch} - if arch not in repocfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP]: - log(f"{fn}(): skipping arch {arch} because repo target map does not define repositories to build for") + # Looping over all node types in the node_map to create a context for each node type and repository + # configured there. Then, check the action filters against these configs to find matching ones. + # If there is a match, prepare the job dir and create the Job object + for node_type_name, partition_info in node_map.items(): + log(f"{fn}(): node_type_name is {node_type_name}, partition_info is {partition_info}") + # Unpack for convenience + arch_dir = build_params[BUILD_PARAM_ARCH] + if BUILD_PARAM_ACCEL in build_params: + arch_dir += f"/{build_params[BUILD_PARAM_ACCEL]}" + build_for_accel = build_params[BUILD_PARAM_ACCEL] + else: + build_for_accel = '' + arch_dir.replace('/', '_') + # check if repo_targets is defined for this virtual partition + if 'repo_targets' not in partition_info: + log(f"{fn}(): skipping arch {node_type_name}, " + "because no repo_targets were defined for this (virtual) partition") continue - for repo_id in repocfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP][arch]: + for repo_id in partition_info['repo_targets']: # ensure repocfg contains information about the repository repo_id if repo_id != EESSI # Note, EESSI is a bad/misleading name, it should be more like AS_IN_CONTAINER if (repo_id != "EESSI" and repo_id != "EESSI-pilot") and repo_id not in repocfg: @@ -646,8 +643,16 @@ def prepare_jobs(pr, cfg, event_info, action_filter): # false --> log & continue to next iteration of for loop if action_filter: log(f"{fn}(): checking filter {action_filter.to_string()}") - context = {"architecture": arch, "repository": repo_id, "instance": app_name} + context = { + "architecture": partition_info['cpu_subdir'], + "repository": repo_id, + "instance": app_name + } + # Optionally add accelerator to the context + if 'accel' in partition_info: + context['accelerator'] = partition_info['accel'] log(f"{fn}(): context is '{json.dumps(context, indent=4)}'") + if not action_filter.check_filters(context): log(f"{fn}(): context does NOT satisfy filter(s), skipping") continue @@ -655,13 +660,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter): log(f"{fn}(): context DOES satisfy filter(s), going on with job") # we reached this point when the filter matched (otherwise we # 'continue' with the next repository) - # for each match of the filter we create a specific job directory - # however, matching CPU architectures works differently to handling - # accelerators; multiple CPU architectures defined in arch_target_map - # can match the (CPU) architecture component of a filter; in - # contrast, the value of the accelerator filter is just passed down - # to scripts in bot/ directory of the pull request (see function - # prepare_job_cfg and creation of Job tuple below) + # We create a specific job directory for the architecture that is going to be build 'for:' job_dir = os.path.join(run_dir, arch_dir, repo_id) os.makedirs(job_dir, exist_ok=True) log(f"{fn}(): job_dir '{job_dir}'") @@ -673,19 +672,24 @@ def prepare_jobs(pr, cfg, event_info, action_filter): ) comment_download_pr(base_repo_name, pr, download_pr_exit_code, download_pr_error, error_stage) # prepare job configuration file 'job.cfg' in directory /cfg - cpu_target = '/'.join(arch.split('/')[1:]) - os_type = arch.split('/')[0] - - log(f"{fn}(): arch = '{arch}' => cpu_target = '{cpu_target}' , os_type = '{os_type}'" - f", accelerator = '{accelerator}'") - - prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, cpu_target, os_type, accelerator) + msg = f"{fn}(): node type = '{node_type_name}' => " + msg += f"requested cpu_target = '{partition_info['cpu_subdir']}, " + msg += f"build cpu_target = '{build_params[BUILD_PARAM_ARCH]}', " + msg += f"configured os = '{partition_info['os']}', " + if 'accel' in partition_info: + msg += f"requested accelerator(s) = '{partition_info['accel']}, " + msg += f"build accelerator = '{build_for_accel}'" + log(msg) + + prepare_job_cfg(job_dir, build_env_cfg, repocfg, repo_id, build_params[BUILD_PARAM_ARCH], + partition_info['os'], build_for_accel, node_type_name) if exportvars: prepare_export_vars_file(job_dir, exportvars) # enlist jobs to proceed - job = Job(job_dir, arch, repo_id, slurm_opt, year_month, pr_id, accelerator) + job = Job(job_dir, partition_info['cpu_subdir'], repo_id, partition_info['slurm_params'], year_month, + pr_id, accelerator) jobs.append(job) log(f"{fn}(): {len(jobs)} jobs to proceed after applying white list") @@ -695,7 +699,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter): return jobs -def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, os_type, accelerator): +def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, os_type, accelerator, node_type_name): """ Set up job configuration file 'job.cfg' in directory /cfg @@ -707,6 +711,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, software_subdir (string): software subdirectory to build for (e.g., 'x86_64/generic') os_type (string): type of the os (e.g., 'linux') accelerator (string): defines accelerator to build for (e.g., 'nvidia/cc80') + node_type_name (string): the node type name, as configured in app.cfg Returns: None (implicitly) @@ -777,6 +782,7 @@ def prepare_job_cfg(job_dir, build_env_cfg, repos_cfg, repo_id, software_subdir, job_cfg_arch_section = job_metadata.JOB_CFG_ARCHITECTURE_SECTION job_cfg[job_cfg_arch_section] = {} + job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_NODE_TYPE] = node_type_name job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_SOFTWARE_SUBDIR] = software_subdir job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_OS_TYPE] = os_type job_cfg[job_cfg_arch_section][job_metadata.JOB_CFG_ARCHITECTURE_ACCELERATOR] = accelerator if accelerator else '' @@ -915,7 +921,7 @@ def submit_job(job, cfg): return job_id, symlink -def create_pr_comment(job, job_id, app_name, pr, symlink): +def create_pr_comment(job, job_id, app_name, pr, symlink, build_params): """ Create a comment to the pull request for a newly submitted job @@ -925,6 +931,7 @@ def create_pr_comment(job, job_id, app_name, pr, symlink): app_name (string): name of the app pr (github.PullRequest.PullRequest): instance representing the pull request symlink (string): symlink from main pr_ dir to job dir + build_params (EESSIBotBuildParams): dict that contains the build parameters for the job Returns: github.IssueComment.IssueComment instance or None (note, github refers to @@ -932,16 +939,24 @@ def create_pr_comment(job, job_id, app_name, pr, symlink): """ fn = sys._getframe().f_code.co_name - # obtain arch from job.arch_target which has the format OS/ARCH - arch_name = '-'.join(job.arch_target.split('/')[1:]) + # Obtain the architecture on which we are building from job.arch_target, which has the format OS/ARCH + on_arch = '-'.join(job.arch_target.split('/')[1:]) + + # Obtain the architecture to build for + for_arch = build_params[BUILD_PARAM_ARCH] submitted_job_comments_cfg = config.read_config()[config.SECTION_SUBMITTED_JOB_COMMENTS] - # set string for accelerator if job.accelerator is defined/set (e.g., not None) - accelerator_spec_str = '' + # Set string for accelerator to build on + accelerator_spec = f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR]}" + on_accelerator_str = '' if job.accelerator: - accelerator_spec = f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR]}" - accelerator_spec_str = accelerator_spec.format(accelerator=job.accelerator) + on_accelerator_str = accelerator_spec.format(accelerator=job.accelerator) + + # Set string for accelerator to build for + for_accelerator_str = '' + if BUILD_PARAM_ACCEL in build_params: + for_accelerator_str = accelerator_spec.format(accelerator=build_params[BUILD_PARAM_ACCEL]) # get current date and time dt = datetime.now(timezone.utc) @@ -949,6 +964,10 @@ def create_pr_comment(job, job_id, app_name, pr, symlink): # construct initial job comment buildenv = config.read_config()[config.SECTION_BUILDENV] job_handover_protocol = buildenv.get(config.BUILDENV_SETTING_JOB_HANDOVER_PROTOCOL) + new_job_instance_repo = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO] + build_on_arch = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH] + build_for_arch = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH] + jobdir = submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_JOBDIR] if job_handover_protocol == config.JOB_HANDOVER_PROTOCOL_DELAYED_BEGIN: release_msg_string = config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_DELAYED_BEGIN_MSG release_comment_template = submitted_job_comments_cfg[release_msg_string] @@ -957,34 +976,44 @@ def create_pr_comment(job, job_id, app_name, pr, symlink): poll_interval = int(job_manager_cfg.get(config.JOB_MANAGER_SETTING_POLL_INTERVAL)) delay_factor = float(buildenv.get(config.BUILDENV_SETTING_JOB_DELAY_BEGIN_FACTOR, 2)) eligible_in_seconds = int(poll_interval * delay_factor) - job_comment = (f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT]}" - f"\n|date|job status|comment|\n" + job_comment = (f"{new_job_instance_repo}\n" + f"{build_on_arch}\n" + f"{build_for_arch}\n" + f"{jobdir}\n" + f"|date|job status|comment|\n" f"|----------|----------|------------------------|\n" f"|{dt.strftime('%b %d %X %Z %Y')}|" f"submitted|" f"{release_comment_template}|").format( app_name=app_name, - arch_name=arch_name, + on_arch=on_arch, + for_arch=for_arch, symlink=symlink, repo_id=job.repo_id, job_id=job_id, delay_seconds=eligible_in_seconds, - accelerator_spec=accelerator_spec_str) + on_accelerator=on_accelerator_str, + for_accelerator=for_accelerator_str) else: release_msg_string = config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_HOLD_RELEASE_MSG release_comment_template = submitted_job_comments_cfg[release_msg_string] - job_comment = (f"{submitted_job_comments_cfg[config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT]}" - f"\n|date|job status|comment|\n" + job_comment = (f"{new_job_instance_repo}\n" + f"{build_on_arch}\n" + f"{build_for_arch}\n" + f"{jobdir}\n" + f"|date|job status|comment|\n" f"|----------|----------|------------------------|\n" f"|{dt.strftime('%b %d %X %Z %Y')}|" f"submitted|" f"{release_comment_template}|").format( app_name=app_name, - arch_name=arch_name, + on_arch=on_arch, + for_arch=for_arch, symlink=symlink, repo_id=job.repo_id, job_id=job_id, - accelerator_spec=accelerator_spec_str) + on_accelerator=on_accelerator_str, + for_accelerator=for_accelerator_str) # create comment to pull request repo_name = pr.base.repo.full_name @@ -997,7 +1026,7 @@ def create_pr_comment(job, job_id, app_name, pr, symlink): return None -def submit_build_jobs(pr, event_info, action_filter): +def submit_build_jobs(pr, event_info, action_filter, build_params): """ Create build jobs for a pull request by preparing jobs which match the given filters, submitting them, adding comments to the pull request on GitHub and @@ -1007,6 +1036,7 @@ def submit_build_jobs(pr, event_info, action_filter): pr (github.PullRequest.PullRequest): instance representing the pull request event_info (dict): event received by event_handler action_filter (EESSIBotActionFilter): used to filter which jobs shall be prepared + build_params (EESSIBotBuildParams): dict that contains the build parameters for the job Returns: (dict): dictionary mapping a job id to a github.IssueComment.IssueComment @@ -1019,7 +1049,7 @@ def submit_build_jobs(pr, event_info, action_filter): app_name = cfg[config.SECTION_GITHUB].get(config.GITHUB_SETTING_APP_NAME) # setup job directories (one per element in product of architecture x repositories) - jobs = prepare_jobs(pr, cfg, event_info, action_filter) + jobs = prepare_jobs(pr, cfg, event_info, action_filter, build_params) # return if there are no jobs to be submitted if not jobs: @@ -1034,7 +1064,7 @@ def submit_build_jobs(pr, event_info, action_filter): job_id, symlink = submit_job(job, cfg) # create pull request comment to report about the submitted job - pr_comment = create_pr_comment(job, job_id, app_name, pr, symlink) + pr_comment = create_pr_comment(job, job_id, app_name, pr, symlink, build_params) job_id_to_comment_map[job_id] = pr_comment pr_comment = pr_comments.PRComment(pr.base.repo.full_name, pr.number, pr_comment.id) @@ -1085,11 +1115,83 @@ def check_build_permission(pr, event_info): return True +def template_to_regex(format_str, with_eol=True): + """ + Converts a formatting string into a regex that can extract all the formatted + parts of the string. If with_eol is True, it assumes the formatted string is followed by an end-of-line + character. This is a requirement if it has to succesfully match a formatting string that ends with a formatting + field. + + Example: if one function creates a formatted string + value = "my_field_value" + format_str = f"This is my string, with a custom field: {my_field}\n" + formatted_string = format_str.format(my_field=value) + Another function can then grab the original value of my_field by doing: + my_re = template_to_regex(format_str) + match_object = re.match(my_re, formatted_string) + match_object['my_field'] then contains "my_field_value" + This is useful when e.g. one function posts a GitHub comment, and another wants to extract information from that + + Args: + format_str (string): a formatting string, with template placeholders. + with_eol (bool, optional): a boolean, indicating if the formatting string is expected to be followed by + an end of line character + + """ + + # string.Formatter returns a 4-tuple of literal text, field name, format spec, and conversion + # E.g if format_str = "This is my {app} it is currently {status}" + # formatter = [ + # ("This is my", "app", "", None), + # ("it is currently", "status", "", None), + # ("", None, None, None), + # ] + formatter = string.Formatter() + regex_parts = [] + + for literal_text, field_name, _, _ in formatter.parse(format_str): + # We use re.escape to escape any special characters in the literal_text, as we want to match those literally + regex_parts.append(re.escape(literal_text)) + if field_name is not None: + # Create a non-greedy, named capture group. Note that the {field_name} itself is a format specifier + # So we get the actual field name as the name of the capture group + # In other words, if our format_str is "My string with {a_field}" then the named capture group will be + # called 'a_field' + # We match any character, but in a non-greedy way. Thus, as soon as it can match the next + # literal text section, it will - thus assuming that that's the end of the field + # We use .* to allow for empty fields (such as the optional accelerator fields) + regex_parts.append(f"(?P<{field_name}>.*?)") + + # Finally, make sure we append a $ to the regex. This is necessary because of our non-greedy matching + # strategy. Otherwise, a formatting string that ends with a formatting item would only match the first letter + # of the field, because it doesn't find anything to match after (and it is non-greedy). With the $, it has + # something to match after the field, thus making sure it matches the whole field + # This does assume that the format_str in the string to be matched is indeed followed by an end-of-line character + # I.e. if a function that creates the formatted string does + # my_string = f"{format_str}\n" + # (i.e. has an end-of-line after the format specifier) it can be matched by another function that does + # my_re = template_to_regex(format_str) + # re.match(my_re, my_string) + full_pattern = ''.join(regex_parts) + if with_eol: + full_pattern += "$" + return re.compile(full_pattern) + + +class PartialFormatDict(dict): + """ + A dictionary class that allows for missing keys - and will just return {key} in that case. + This can be used to partially format some, but not all placeholders in a formatting string. + """ + def __missing__(self, key): + return "{" + key + "}" + + def request_bot_build_issue_comments(repo_name, pr_number): """ Query the github API for the issue_comments in a pr. - Archs: + Args: repo_name (string): name of the repository (format USER_OR_ORGANISATION/REPOSITORY) pr_number (int): number og the pr @@ -1099,7 +1201,7 @@ def request_bot_build_issue_comments(repo_name, pr_number): """ fn = sys._getframe().f_code.co_name - status_table = {'arch': [], 'date': [], 'status': [], 'url': [], 'result': []} + status_table = {'on arch': [], 'for arch': [], 'for repo': [], 'date': [], 'status': [], 'url': [], 'result': []} cfg = config.read_config() # for loop because github has max 100 items per request. @@ -1114,19 +1216,81 @@ def request_bot_build_issue_comments(repo_name, pr_number): for comment in comments: # iterate through the comments to find the one where the status of the build was in submitted_job_comments_section = cfg[config.SECTION_SUBMITTED_JOB_COMMENTS] - initial_comment_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT] - if initial_comment_fmt[:20] in comment['body']: - - # get archictecture from comment['body'] - first_line = comment['body'].split('\n')[0] - arch_map = get_architecture_targets(cfg) - for arch in arch_map.keys(): - # drop the first element in arch (which names the OS type) and join the remaining items with '-' - target_arch = '-'.join(arch.split('/')[1:]) - if target_arch in first_line: - status_table['arch'].append(target_arch) + accelerator_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR] + instance_repo_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO] + instance_repo_re = template_to_regex(instance_repo_fmt) + comment_body = comment['body'].split('\n') + instance_repo_match = re.match(instance_repo_re, comment_body[0]) + # Check if this body starts with an initial comment from the bot (first item is always the instance + repo + # it is building for) + # Then, check that it has at least 4 lines so that we can safely index up to that number + if instance_repo_match and len(comment_body) >= 4: + log(f"{fn}(): found bot build response in issue, processing...") + + # First, extract the repo_id + log(f"{fn}(): found build for repository: {instance_repo_match.group('repo_id')}") + status_table['for repo'].append(instance_repo_match.group('repo_id')) + + # Then, try to match the architecture we build on. + # First try this including accelerator, to see if one was defined + on_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH] + on_arch_fmt_with_accel = on_arch_fmt.format_map(PartialFormatDict(on_accelerator=accelerator_fmt)) + on_arch_re_with_accel = template_to_regex(on_arch_fmt_with_accel) + on_arch_match = re.match(on_arch_re_with_accel, comment_body[1]) + if on_arch_match: + # Pattern with accelerator matched, append to status_table + log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}, " + f"with accelerator {on_arch_match.group('accelerator')}") + status_table['on arch'].append(f"`{on_arch_match.group('on_arch')}`, " + f"`{on_arch_match.group('accelerator')}`") + else: + # Pattern with accelerator did not match, retry without accelerator + on_arch_re = template_to_regex(on_arch_fmt) + on_arch_match = re.match(on_arch_re, comment_body[1]) + if on_arch_match: + # Pattern without accelerator matched, append to status_table + log(f"{fn}(): found build on architecture: {on_arch_match.group('on_arch')}") + status_table['on arch'].append(f"`{on_arch_match.group('on_arch')}`") + else: + # This shouldn't happen: we had an instance_repo_match, but no match for the 'on architecture' + msg = "Could not match regular expression for extracting the architecture to build on.\n" + msg += "String to be matched:\n" + msg += f"{comment_body[1]}\n" + msg += "First regex attempted:\n" + msg += f"{on_arch_re_with_accel.pattern}\n" + msg += "Second regex attempted:\n" + msg += f"{on_arch_re.pattern}\n" + raise ValueError(msg) + + # Now, do the same for the architecture we build for. I.e. first, try to match including accelerator + for_arch_fmt = submitted_job_comments_section[config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH] + for_arch_fmt_with_accel = for_arch_fmt.format_map(PartialFormatDict(for_accelerator=accelerator_fmt)) + for_arch_re_with_accel = template_to_regex(for_arch_fmt_with_accel) + for_arch_match = re.match(for_arch_re_with_accel, comment_body[2]) + if for_arch_match: + # Pattern with accelerator matched, append to status_table + log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}, " + f"with accelerator {for_arch_match.group('accelerator')}") + status_table['for arch'].append(f"`{for_arch_match.group('for_arch')}`, " + f"`{for_arch_match.group('accelerator')}`") + else: + # Pattern with accelerator did not match, retry without accelerator + for_arch_re = template_to_regex(for_arch_fmt) + for_arch_match = re.match(for_arch_re, comment_body[2]) + if for_arch_match: + # Pattern without accelerator matched, append to status_table + log(f"{fn}(): found build for architecture: {for_arch_match.group('for_arch')}") + status_table['for arch'].append(f"`{for_arch_match.group('for_arch')}`") else: - log(f"{fn}(): target_arch '{target_arch}' not found in first line '{first_line}'") + # This shouldn't happen: we had an instance_repo_match, but no match for the 'on architecture' + msg = "Could not match regular expression for extracting the architecture to build for.\n" + msg += "String to be matched:\n" + msg += f"{comment_body[2]}\n" + msg += "First regex attempted:\n" + msg += f"{for_arch_re_with_accel.pattern}\n" + msg += "Second regex attempted:\n" + msg += f"{for_arch_re.pattern}\n" + raise ValueError(msg) # get date, status, url and result from the markdown table comment_table = comment['body'][comment['body'].find('|'):comment['body'].rfind('|')+1] diff --git a/tests/test_app.cfg b/tests/test_app.cfg index 4f833bbf..56c1d6cc 100644 --- a/tests/test_app.cfg +++ b/tests/test_app.cfg @@ -21,7 +21,10 @@ job_handover_protocol = hold_release awaits_release = job id `{job_id}` awaits release by job manager awaits_release_delayed_begin_msg = job id `{job_id}` will be eligible to start in about {delay_seconds} seconds awaits_release_hold_release_msg = job id `{job_id}` awaits release by job manager -initial_comment = New job on instance `{app_name}` for CPU micro-architecture `{arch_name}`{accelerator_spec} for repository `{repo_id}` in job dir `{symlink}` +new_job_instance_repo = New job on instance `{app_name}` for repository `{repo_id}` +build_on_arch = Building on: `{on_arch}`{on_accelerator} +build_for_arch = Building for: `{for_arch}`{for_accelerator} +jobdir = Job dir: `{symlink}` with_accelerator =  and accelerator `{accelerator}` [new_job_comments] diff --git a/tests/test_task_build.py b/tests/test_task_build.py index 1c289947..af49ac9b 100644 --- a/tests/test_task_build.py +++ b/tests/test_task_build.py @@ -29,6 +29,7 @@ # Local application imports (anything from EESSI/eessi-bot-software-layer) from tasks.build import Job, create_pr_comment from tools import run_cmd, run_subprocess +from tools.build_params import EESSIBotBuildParams from tools.job_metadata import create_metadata_file, read_metadata_file from tools.pr_comments import PRComment, get_submitted_job_comment @@ -287,6 +288,7 @@ def test_create_pr_comment_succeeds(monkeypatch, mocked_github, tmpdir): ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" app_name = "pytest" @@ -295,7 +297,7 @@ def test_create_pr_comment_succeeds(monkeypatch, mocked_github, tmpdir): repo = mocked_github.get_repo(repo_name) pr = repo.get_pull(pr_number) symlink = "/symlink" - comment = create_pr_comment(job, job_id, app_name, pr, symlink) + comment = create_pr_comment(job, job_id, app_name, pr, symlink, build_params) assert comment.id == 1 # check if created comment includes jobid? print("VERIFYING PR COMMENT") @@ -317,6 +319,7 @@ def test_create_pr_comment_succeeds_none(monkeypatch, mocked_github, tmpdir): ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" app_name = "pytest" @@ -325,7 +328,7 @@ def test_create_pr_comment_succeeds_none(monkeypatch, mocked_github, tmpdir): repo = mocked_github.get_repo(repo_name) pr = repo.get_pull(pr_number) symlink = "/symlink" - comment = create_pr_comment(job, job_id, app_name, pr, symlink) + comment = create_pr_comment(job, job_id, app_name, pr, symlink, build_params) assert comment is None @@ -343,6 +346,7 @@ def test_create_pr_comment_raises_once_then_succeeds(monkeypatch, mocked_github, ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" app_name = "pytest" @@ -351,7 +355,7 @@ def test_create_pr_comment_raises_once_then_succeeds(monkeypatch, mocked_github, repo = mocked_github.get_repo(repo_name) pr = repo.get_pull(pr_number) symlink = "/symlink" - comment = create_pr_comment(job, job_id, app_name, pr, symlink) + comment = create_pr_comment(job, job_id, app_name, pr, symlink, build_params) assert comment.id == 1 assert pr.create_call_count == 2 @@ -369,6 +373,7 @@ def test_create_pr_comment_always_raises(monkeypatch, mocked_github, tmpdir): ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" app_name = "pytest" @@ -378,7 +383,7 @@ def test_create_pr_comment_always_raises(monkeypatch, mocked_github, tmpdir): pr = repo.get_pull(pr_number) symlink = "/symlink" with pytest.raises(Exception) as err: - create_pr_comment(job, job_id, app_name, pr, symlink) + create_pr_comment(job, job_id, app_name, pr, symlink, build_params) assert err.type == CreateIssueCommentException assert pr.create_call_count == 3 @@ -396,6 +401,7 @@ def test_create_pr_comment_three_raises(monkeypatch, mocked_github, tmpdir): ym = datetime.today().strftime('%Y.%m') pr_number = 1 job = Job(tmpdir, "test/architecture", "EESSI", "--speed-up", ym, pr_number, "fpga/magic") + build_params = EESSIBotBuildParams("arch=amd/zen4,accel=nvidia/cc90") job_id = "123" app_name = "pytest" @@ -405,7 +411,7 @@ def test_create_pr_comment_three_raises(monkeypatch, mocked_github, tmpdir): pr = repo.get_pull(pr_number) symlink = "/symlink" with pytest.raises(Exception) as err: - create_pr_comment(job, job_id, app_name, pr, symlink) + create_pr_comment(job, job_id, app_name, pr, symlink, build_params) assert err.type == CreateIssueCommentException assert pr.create_call_count == 3 diff --git a/tests/test_tools_filter.py b/tests/test_tools_filter.py index b689aa60..26cd31cd 100644 --- a/tests/test_tools_filter.py +++ b/tests/test_tools_filter.py @@ -231,27 +231,52 @@ def test_match_empty_context(complex_filter): assert expected == actual -def test_match_architecture_context(complex_filter): +# A context lacking keys for components in the filter shouldn't match +def test_match_sparse_context(complex_filter): context = {"architecture": "x86_64/intel/cascadelake"} - expected = True + expected = False actual = complex_filter.check_filters(context) assert expected == actual -def test_match_architecture_job_context(complex_filter): - context = {"architecture": "x86_64/intel/cascadelake", "job": 1234} +def test_matching_context(complex_filter): + context = {"architecture": "x86_64/intel/cascadelake", "repository": "nessi.no-2022.A", "instance": "A"} expected = True actual = complex_filter.check_filters(context) assert expected == actual -def test_non_match_architecture_repository_context(complex_filter): - context = {"architecture": "x86_64/intel/cascadelake", "repository": "EESSI"} +def test_non_match_architecture_context(complex_filter): + context = {"architecture": "x86_64/amd/zen4", "repository": "EESSI", "instance": "mybot", "job": 1234} + expected = False + actual = complex_filter.check_filters(context) + assert expected == actual + + +def test_non_match_repository_context(complex_filter): + context = {"architecture": "x86_64/intel/cascadelake", "repository": "EESSI", "instance": "A"} + expected = False + actual = complex_filter.check_filters(context) + assert expected == actual + + +def test_non_match_instance_context(complex_filter): + context = {"architecture": "x86_64/intel/cascadelake", "repository": "nessi.no-2022.A", "instance": "B"} expected = False actual = complex_filter.check_filters(context) assert expected == actual +# If additional keys are present in the context for which no filter component is defined +# it should not prevent a match +def test_match_additional_context(complex_filter): + context = {"architecture": "x86_64/intel/cascadelake", "repository": "nessi.no-2022.A", "instance": "A", + "job": 1234} + expected = True + actual = complex_filter.check_filters(context) + assert expected == actual + + @pytest.fixture def arch_filter_slash_syntax(): af = EESSIBotActionFilter("") diff --git a/tools/build_params.py b/tools/build_params.py new file mode 100644 index 00000000..05eb63eb --- /dev/null +++ b/tools/build_params.py @@ -0,0 +1,78 @@ +# This file is part of the EESSI build-and-deploy bot, +# see https://github.com/EESSI/eessi-bot-software-layer +# +# The bot helps with requests to add software installations to the +# EESSI software layer, see https://github.com/EESSI/software-layer +# +# author: Caspar van Leeuwen +# +# license: GPLv2 +# + +from tools.filter import FILTER_COMPONENT_ACCEL, FILTER_COMPONENT_ARCH + +# Define these constants with the same values. We want the arguments passed to +# on: and for: to use the same keywords +BUILD_PARAM_ACCEL = FILTER_COMPONENT_ACCEL +BUILD_PARAM_ARCH = FILTER_COMPONENT_ARCH +BUILD_PARAMS = [ + BUILD_PARAM_ACCEL, + BUILD_PARAM_ARCH +] + + +class EESSIBotBuildParamsValueError(Exception): + """ + Exception to be raised when an inappropriate value is specified for a build parameter + """ + pass + + +class EESSIBotBuildParamsNameError(Exception): + """ + Exception to be raised when an unkown build parameter name is specified + """ + pass + + +class EESSIBotBuildParams(dict): + """ + Class for representing build parameters. Essentially, this is a dictionary class + but with some additional parsing for the constructor + """ + def __init__(self, build_parameters): + """ + EESSIBotBuildParams constructor + + Args: + build_parameters (string): string containing comma separated build parameters + Example: "arch=amd/zen4,accel=nvidia/cc90" + + Raises: + EESSIBotBuildParamsNameError: raised if parsing an unknown build parameter + string + EESSIBotBuildParamsValueError: raised if an invalid value is passed for a build parameter + """ + build_param_dict = {} + + # Loop over defined build parameters argument + build_params_list = build_parameters.split(',') + for item in build_params_list: + # Separate build parameter name and value + build_param = item.split('=') + if len(build_param) != 2: + msg = f"Expected argument {item} to be split into exactly two parts when splitting by '=', " + msg += f"but the number of items after splitting is {len(build_param)}" + raise EESSIBotBuildParamsValueError(msg) + param_found = False + for full_param_name in BUILD_PARAMS: + # Identify which build param we are matching + if full_param_name.startswith(build_param[0]): + param_found = True + # Store the value of the build parameter by it's full name + build_param_dict[full_param_name] = build_param[1] + if not param_found: + msg = f"Build parameter {build_param[0]} not found. Known build parameters: {BUILD_PARAMS}" + raise EESSIBotBuildParamsNameError(msg) + + super().__init__(build_param_dict) diff --git a/tools/commands.py b/tools/commands.py index b842cc19..3902ab70 100644 --- a/tools/commands.py +++ b/tools/commands.py @@ -18,6 +18,7 @@ # Local application imports (anything from EESSI/eessi-bot-software-layer) from tools.filter import EESSIBotActionFilter, EESSIBotActionFilterError +from tools.build_params import EESSIBotBuildParams def contains_any_bot_command(body): @@ -85,19 +86,76 @@ def __init__(self, cmd_str): """ # TODO add function name to log messages cmd_as_list = cmd_str.split() - self.command = cmd_as_list[0] + self.command = cmd_as_list[0] # E.g. 'build' or 'help' + # TODO always init self.action_filters with empty EESSIBotActionFilter? if len(cmd_as_list) > 1: - arg_str = " ".join(cmd_as_list[1:]) - try: - self.action_filters = EESSIBotActionFilter(arg_str) - except EESSIBotActionFilterError as err: - log(f"ERROR: EESSIBotActionFilterError - {err.args}") - self.action_filters = None - raise EESSIBotCommandError("invalid action filter") - except Exception as err: - log(f"Unexpected err={err}, type(err)={type(err)}") - raise + # Extract arguments for the action filters + # By default, everything that follows the 'on:' argument (until the next space) is + # considered part of the argument list for the action filters + target_args = [] + other_filter_args = [] + on_found = False + for arg in cmd_as_list[1:]: + if arg.startswith('on:'): + on_found = True + # Extract everything after 'on:' and split by comma + filter_content = arg[3:] # Remove 'on:' prefix + target_args.extend(filter_content.split(',')) + elif arg.startswith('for:'): + # Anything listed as 'for:' is build parameters + build_params = arg[4:] + # EESSIBotBuildParams is essentially a dict, but parses the input argument + # according to the expected argument format for 'for:' + self.build_params = EESSIBotBuildParams(build_params) + else: + # Anything that is not 'on:' or 'for:' should just be passed on as normal + # No further parsing of the value is needed + other_filter_args.extend([arg]) + + # If no 'on:' is found in the argument list, everything that follows the 'for:' argument + # (until the next space) is considered the argument list for the action filters + # Essentially, this represents a native build, i.e. the hardware we build for should be the + # hardware we build on + if not on_found: + for arg in cmd_as_list[1:]: + if arg.startswith('for:'): + # Extract everything after the 'for:' suffix and split by comma + filter_content = arg[4:] + target_args.extend(filter_content.split(',')) + + # Join the filter arguments and pass to EESSIBotActionFilter + # At this point, target_args is e.g. ["arch=amd/zen2","accel=nvidia/cc90"] + # But EESSIBotActionFilter expects e.g. "arch:amd/zen2 accel:nvidia/cc90" + # First, normalize to the ["arch:amd/zen2", "accel:nvidia/cc90"] format + normalized_filters = [] + if target_args: + for filter_item in target_args: + if '=' in filter_item: + component, pattern = filter_item.split('=', 1) + normalized_filters.append(f"{component}:{pattern}") + + # Add the other filter args to the normalized filters. The other_filter_args are already colon-separated + # so no special parsing needed there + log(f"Extracted filter arguments related to hardware target: {normalized_filters}") + log(f"Other extracted filter arguments: {other_filter_args}") + normalized_filters += other_filter_args + + # Finally, change into a space-separated string, as expected by EESSIBotActionFilter + # e.g "arch:amd/zen2 accel:nvidia/cc90 repo:my.repo.io" + if normalized_filters: + arg_str = " ".join(normalized_filters) + try: + log(f"Passing the following arguments to the EESSIBotActionFilter: {arg_str}") + self.action_filters = EESSIBotActionFilter(arg_str) + except EESSIBotActionFilterError as err: + log(f"ERROR: EESSIBotActionFilterError - {err.args}") + self.action_filters = None + raise EESSIBotCommandError("invalid action filter") + except Exception as err: + log(f"Unexpected err={err}, type(err)={type(err)}") + raise + # No arguments were passed to the command self.command else: self.action_filters = EESSIBotActionFilter("") diff --git a/tools/config.py b/tools/config.py index fc790d56..7f814ea4 100644 --- a/tools/config.py +++ b/tools/config.py @@ -30,7 +30,8 @@ # sectionname_SETTING_settingname for any setting with name settingname in # section sectionname SECTION_ARCHITECTURETARGETS = 'architecturetargets' -ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP = 'arch_target_map' +ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP = 'arch_target_map' # Obsolete, replaced by NODE_TYPE_MAP +NODE_TYPE_MAP = 'node_type_map' SECTION_BOT_CONTROL = 'bot_control' BOT_CONTROL_SETTING_COMMAND_PERMISSION = 'command_permission' @@ -121,6 +122,10 @@ SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE = 'awaits_release' SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_DELAYED_BEGIN_MSG = 'awaits_release_delayed_begin_msg' SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_HOLD_RELEASE_MSG = 'awaits_release_hold_release_msg' +SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO = 'new_job_instance_repo' +SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH = 'build_on_arch' +SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH = 'build_for_arch' +SUBMITTED_JOB_COMMENTS_SETTING_JOBDIR = 'jobdir' SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT = 'initial_comment' SUBMITTED_JOB_COMMENTS_SETTING_WITH_ACCELERATOR = 'with_accelerator' @@ -136,6 +141,33 @@ JOB_HANDOVER_PROTOCOL_HOLD_RELEASE } +# Allows us to error on config items that were removed +FORBIDDEN_CONFIG = { + SECTION_ARCHITECTURETARGETS: [ + ( + ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP, + f"Config invalid: '{ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP}' was removed and replaced by " + f"'{NODE_TYPE_MAP}'. See app.cfg.example for details." + ) + ], + SECTION_REPO_TARGETS: [ + ( + REPO_TARGETS_SETTING_REPO_TARGET_MAP, + f"Config invalid: '{REPO_TARGETS_SETTING_REPO_TARGET_MAP} was removed. Repository targets can now be " + f"specified within the '{NODE_TYPE_MAP}' dictionary. See app.cfg.example for details." + ) + ], + SECTION_SUBMITTED_JOB_COMMENTS: [ + ( + SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT, + f"Config invalid: '{SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT}' was removed and replaced by " + f"'{SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO}', '{SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH}', " + f"'{SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH}' and '{SUBMITTED_JOB_COMMENTS_SETTING_JOBDIR}'. " + "See app.cfg.example for details." + ) + ] +} + def read_config(path='app.cfg'): """ @@ -159,10 +191,10 @@ def read_config(path='app.cfg'): return config -def check_required_cfg_settings(req_settings, path="app.cfg"): +def check_cfg_settings(req_settings, path="app.cfg"): """ - Reads the config file, checks if it contains the required settings, - if not logs an error message and exits. + Reads the config file, checks if it contains the required settings, and if it does not contain forbidden + (i.e. removed) settings. If the check fails, logs an error message and exits. Args: req_settings (dict (str, list)): required settings @@ -182,4 +214,14 @@ def check_required_cfg_settings(req_settings, path="app.cfg"): for item in req_settings[section]: if item not in cfg[section]: error(f'Missing configuration item "{item}" in section "{section}" of configuration file {path}.') + + # Check for forbidden arguments + for section in FORBIDDEN_CONFIG: + if section in cfg: + for item in FORBIDDEN_CONFIG[section]: + # First element of the tuple is the forbidden config item, check if its in the section + if item[0] in cfg[section]: + # Item 1 contains a specific error message + error(item[1]) + return True diff --git a/tools/filter.py b/tools/filter.py index 0caa2af8..ddc58352 100644 --- a/tools/filter.py +++ b/tools/filter.py @@ -303,4 +303,18 @@ def check_filters(self, context): else: check = False break + # Action filter wasn't found in the context, we won't allow this + else: + check = False + break + + # If the context declares an accelerator, enforce that a filter is defined for this component as well + # I.e. this enforces that a context with accelerator will only be used if an accelerator is explicitely + # requested in the build command, thus preventing CPU-only builds on GPU nodes (unless explicitely intended) + if ( + FILTER_COMPONENT_ACCEL in context and not + any(af.component == FILTER_COMPONENT_ACCEL for af in self.action_filters) + ): + check = False + return check diff --git a/tools/job_metadata.py b/tools/job_metadata.py index 7b7b8d0a..f5ee21ce 100644 --- a/tools/job_metadata.py +++ b/tools/job_metadata.py @@ -34,7 +34,9 @@ JOB_CFG_UPLOAD_STEP = "upload_step" # JWD/cfg/$JOB_CFG_FILENAME + JOB_CFG_ARCHITECTURE_SECTION = "architecture" +JOB_CFG_ARCHITECTURE_NODE_TYPE = "node_type" JOB_CFG_ARCHITECTURE_OS_TYPE = "os_type" JOB_CFG_ARCHITECTURE_SOFTWARE_SUBDIR = "software_subdir" JOB_CFG_ARCHITECTURE_ACCELERATOR = "accelerator"