Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
4415afc
Make a first attempt to change the expected arch_target_map. Make sur…
Apr 17, 2025
d3240e4
Remove old code that was replaced
Apr 17, 2025
dfbcec3
Merge branch 'EESSI:develop' into adapt_arch_target_map
casparvl Jun 30, 2025
b54fdf4
Fix xome small issues with non-existing keys
Jun 30, 2025
1e6c3d9
Avoid doing string += None for the arch_dir if accelerator = None. Al…
Jun 30, 2025
e000c91
Make sure that if the context (i.e. app.cfg) defines AN accelerator, …
Jun 30, 2025
40ceefe
Some cleanup
Jun 30, 2025
fad4f47
Remove repo_target_map from config, and all occurences that import it…
Jul 1, 2025
b4032a6
Fix quotation of keys
Jul 1, 2025
3898df7
Fix flake8 issue
Jul 1, 2025
ef0c430
Unpack the actual arch_target_map by accessing it with a key to get t…
Jul 1, 2025
e3df690
Fix mistake in build path
Jul 8, 2025
424a001
Parse on: and for: options, and pass the correct values on to the com…
Jul 9, 2025
3f00e51
Make sure that the for: arguments are used as build parameters
Jul 10, 2025
2625b30
Change path for job dir so that it represents the 'for' architectures
Jul 10, 2025
ffa2303
More extensive reporting by the bot on what to build for/on
Jul 14, 2025
c98e7e8
This is no longer needed, as it is done with the codecs (decode) now
Jul 14, 2025
5904f11
Print real arch_target_map keys when doing show_config
Jul 14, 2025
7c869f0
Reduce number of possible accelerators per node type to one. Nodes wi…
Jul 14, 2025
179ab0a
Fix app.cfg for the fact that partition_info['accel'] is now a string…
Jul 14, 2025
a9a2585
Make sure that we don't access a dict item that doesn't exist
Jul 14, 2025
51a9c74
Make sure a context match fails if the context doesn't provide e.g. a…
Jul 14, 2025
b12c911
Make old config items invalid, rename to node_type and note_type_map,…
Jul 15, 2025
697dc6e
Update the status command to account for the new on:... for:... syntax
Jul 16, 2025
c0fe051
Remove debugging print statements
Jul 16, 2025
f179b66
Warn about the removal of the repo_target_map
Jul 16, 2025
aad663e
Fix typo
Jul 16, 2025
be8c7d0
Fix hound issues
Jul 16, 2025
7f766f4
Format releveant output of show_config as code
Jul 16, 2025
d205598
Rephrase to make things more clear
Jul 16, 2025
ebcc7fd
Forgot to add this new file...
Jul 16, 2025
0a8bc9b
Fix hound issues
Jul 16, 2025
81257db
Update build params call signature
Jul 16, 2025
f974463
Fix example argument, and argument used to create build parameters in…
Jul 16, 2025
4104796
Forgot to actually git add this file again... anyway, updated the syn…
Jul 16, 2025
0b82386
Update the app.cfg used for the unit tests to account for the changes…
Jul 16, 2025
372a7fe
Update tests for new requirement that all filters have to be present …
Jul 16, 2025
d2be02a
Update tests to accomodate for new behaviour of filter checking that …
Jul 16, 2025
6b3a118
Fix hound issues
Jul 16, 2025
3b310f5
Fix flake8 issues
Jul 16, 2025
de0bd1c
Removed some comments that were only there for development, no longer…
Jul 21, 2025
d4ecc7b
Apply suggestions from code review
casparvl Jul 24, 2025
af731e1
Re-comment the awaits_release, as this was done in develop as well. T…
Jul 28, 2025
d48b355
Replace Partition with Node type in show_config output. Also, update …
Jul 28, 2025
6017433
Processed various smaller review comments for tasks/build.py. Elabora…
Jul 28, 2025
279e08f
Apply suggestions from code review
casparvl Jul 29, 2025
80f5f1d
Fix indentation issue
Jul 29, 2025
2f3c0ae
Update tasks/build.py
casparvl Jul 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 55 additions & 11 deletions app.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -305,19 +305,59 @@ signing =


[architecturetargets]
# defines for which architectures the bot will build and what job submission
# parameters shall be used to allocate a compute node with the correct
arch_target_map = {
"linux/x86_64/generic": "--partition x86-64-generic-node",
"linux/x86_64/amd/zen2": "--partition x86-64-amd-zen2-node" }
# arch_target_map has been replaced by node_type_map
# arch_target_map = {
# }

# Each entry in the node_type_map dictionary describes a build node type. The key is a (descriptive) name for this build node, and its value is a dictionary containing the following build node properties as key-value pairs:
- os: its operating system (os)
- cpu_subdir: its CPU architecture
- slurm_params: the SLURM parameters that need to be passed to submit jobs to it
- repo_targets: supported repository targets for this node type
- accel (optional): which accelerators this node has
# All are strings, except repo_targets, which is a list of strings.
# Note that the Slurm parameters should typically be chosen such that a single type of node (with one specific type of
# CPU and one specific type of GPU) should be allocated.
# Below is an example configuration for a system that contains 4 types of nodes: zen2 CPU nodes, zen4 CPU nodes,
# GPU nodes with an icelake CPU and A100 GPU, GPU nodes with a zen4 CPU and an H100 GPU.
# The 'on:' argument to the bot build command determines which node type will be allocated for the build job,
# e.g. 'bot:build on:arch=zen4,accel=nvidia/cc90 for:...' will match the gpu_h100 node type below.
# If no 'on:' argument is passed to the build command, the 'for:' argument is used instead,
# e.g. 'bot:build for:arch=icelake,accel=nvidia/cc80' will match the gpu_a100 node type below.
node_type_map = {
"cpu_zen2": {
"os": "linux",
"cpu_subdir": "x86_64/amd/zen2",
"slurm_params": "-p rome --nodes 1 --ntasks-per-node 16 --cpus-per-task 1",
"repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"]
},
"cpu_zen4": {
"os": "linux",
"cpu_subdir": "x86_64/amd/zen4",
"accel": "None",
"slurm_params": "-p genoa --nodes 1 --ntasks-per-node 24 --cpus-per-task 1",
"repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"]
},
"gpu_a100": {
"os": "linux",
"cpu_subdir": "x86_64/intel/icelake",
"accel": "nvidia/cc80",
"slurm_params": "-p gpu_a100 --nodes 1 --tasks-per-node 18 --cpus-per-task 1 --gpus-per-node 1",
"repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"]
},
"gpu_h100": {
"os": "linux",
"cpu_subdir": "x86_64/amd/zen4",
"accel": "nvidia/cc90",
"slurm_params": "-p gpu_h100 --nodes 1 --tasks-per-node 16 --cpus-per-task 1 --gpus-per-node 1",
"repo_targets": ["eessi.io-2023.06-compat","eessi.io-2023.06-software"]
}}

[repo_targets]
# defines for which repository a arch_target should be build for
#
# EESSI/2023.06 and EESSI/2025.06
repo_target_map = {
"linux/x86_64/amd/zen2" : ["eessi.io-2023.06-software","eessi.io-2025.06-software"] }

# No longer used, repo targets are now specified per node type in the node_type_map
# repo_target_map = {
# "linux/x86_64/amd/zen2" : ["eessi.io-2023.06-software","eessi.io-2025.06-software"] }

# points to definition of repositories (default repository defined by build container)
repos_cfg_dir = PATH_TO_SHARED_DIRECTORY/repos
Expand Down Expand Up @@ -360,8 +400,12 @@ scontrol_command = /usr/bin/scontrol
# awaits_release = job id `{job_id}` awaits release by job manager
awaits_release_delayed_begin_msg = job id `{job_id}` will be eligible to start in about {delay_seconds} seconds
awaits_release_hold_release_msg = job id `{job_id}` awaits release by job manager
initial_comment = New job on instance `{app_name}` for CPU micro-architecture `{arch_name}`{accelerator_spec} for repository `{repo_id}` in job dir `{symlink}`
new_job_instance_repo = New job on instance `{app_name}` for repository `{repo_id}`
build_on_arch = Building on: `{on_arch}`{on_accelerator}
build_for_arch = Building for: `{for_arch}`{for_accelerator}
jobdir = Job dir: `{symlink}`
with_accelerator =  and accelerator `{accelerator}`
# initial_comment = New job on instance `{app_name}` for repository `{repo_id}`\nBuilding on: `{on_arch}`{on_accelerator}\nBuilding for: `{for_arch}`{for_accelerator}\nJob dir: `{symlink}` # no longer used


[new_job_comments]
Expand Down
65 changes: 35 additions & 30 deletions eessi_bot_event_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@

# Local application imports (anything from EESSI/eessi-bot-software-layer)
from connections import github
from tasks.build import check_build_permission, get_architecture_targets, get_repo_cfg, \
request_bot_build_issue_comments, submit_build_jobs
from tasks.build import check_build_permission, get_node_types, request_bot_build_issue_comments, \
submit_build_jobs
from tasks.deploy import deploy_built_artefacts, determine_job_dirs
from tasks.clean_up import move_to_trash_bin
from tools import config
Expand All @@ -43,7 +43,7 @@

REQUIRED_CONFIG = {
config.SECTION_ARCHITECTURETARGETS: [
config.ARCHITECTURETARGETS_SETTING_ARCH_TARGET_MAP], # required
config.NODE_TYPE_MAP], # required
config.SECTION_BOT_CONTROL: [
# config.BOT_CONTROL_SETTING_CHATLEVEL, # optional
config.BOT_CONTROL_SETTING_COMMAND_PERMISSION, # required
Expand Down Expand Up @@ -104,10 +104,12 @@
config.SECTION_JOB_MANAGER: [
config.JOB_MANAGER_SETTING_POLL_INTERVAL], # required
config.SECTION_REPO_TARGETS: [
config.REPO_TARGETS_SETTING_REPO_TARGET_MAP, # required
config.REPO_TARGETS_SETTING_REPOS_CFG_DIR], # required
config.SECTION_SUBMITTED_JOB_COMMENTS: [
config.SUBMITTED_JOB_COMMENTS_SETTING_INITIAL_COMMENT, # required
config.SUBMITTED_JOB_COMMENTS_SETTING_INSTANCE_REPO, # required
config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_ON_ARCH, # required
config.SUBMITTED_JOB_COMMENTS_SETTING_BUILD_FOR_ARCH, # required
config.SUBMITTED_JOB_COMMENTS_SETTING_JOBDIR, # required
# config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE, # optional
config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_DELAYED_BEGIN_MSG, # required
config.SUBMITTED_JOB_COMMENTS_SETTING_AWAITS_RELEASE_HOLD_RELEASE_MSG, # required
Expand Down Expand Up @@ -411,23 +413,21 @@ def handle_pull_request_opened_event(self, event_info, pr, req_chatlevel=ChatLev
app_name = self.cfg[config.SECTION_GITHUB][config.GITHUB_SETTING_APP_NAME]
# TODO check if PR already has a comment with arch targets and
# repositories
arch_map = get_architecture_targets(self.cfg)
repo_cfg = get_repo_cfg(self.cfg)

comment = f"Instance `{app_name}` is configured to build for:"
architectures = ['/'.join(arch.split('/')[1:]) for arch in arch_map.keys()]
comment += "\n- architectures: "
if len(architectures) > 0:
comment += f"{', '.join([f'`{arch}`' for arch in architectures])}"
else:
comment += "none"
repositories = list(set([repo_id for repo_ids in repo_cfg[config.REPO_TARGETS_SETTING_REPO_TARGET_MAP].values()
for repo_id in repo_ids]))
comment += "\n- repositories: "
if len(repositories) > 0:
comment += f"{', '.join([f'`{repo_id}`' for repo_id in repositories])}"
else:
comment += "none"
node_map = get_node_types(self.cfg)

comment = f"Instance `{app_name}` is configured to build on:"
for node in node_map:
comment += f"\n- Node type `{node}`:"
current_node_type = node_map[node]
if "os" in current_node_type:
comment += f"\n - OS: `{current_node_type['os']}`"
if "cpu_subdir" in current_node_type:
comment += f"\n - CPU architecture: `{current_node_type['cpu_subdir']}`"
if "repo_targets" in current_node_type:
comment += f"\n - Repositories: `{current_node_type['repo_targets']}`"
if "accel" in current_node_type:
comment += f"\n - Accelerators: `{current_node_type['accel']}`"
comment += "\n"

self.log(f"PR opened: comment '{comment}'")

Expand Down Expand Up @@ -532,7 +532,7 @@ def handle_bot_command_build(self, event_info, bot_command):
build_msg = ''
if check_build_permission(pr, event_info):
# use filter from command
submitted_jobs = submit_build_jobs(pr, event_info, bot_command.action_filters)
submitted_jobs = submit_build_jobs(pr, event_info, bot_command.action_filters, bot_command.build_params)
if submitted_jobs is None or len(submitted_jobs) == 0:
build_msg = "\n - no jobs were submitted"
else:
Expand Down Expand Up @@ -578,8 +578,8 @@ def handle_bot_command_status(self, event_info, bot_command):
bot_command (EESSIBotCommand): command to be handled

Returns:
github.IssueComment.IssueComment (note, github refers to
PyGithub, not the github from the internal connections module)
(string): list item with a link to the issue comment that was created
containing the status overview
"""
self.log("processing bot command 'status'")
repo_name = event_info['raw_request_body']['repository']['full_name']
Expand All @@ -588,18 +588,23 @@ def handle_bot_command_status(self, event_info, bot_command):

comment_status = ''
comment_status += "\nThis is the status of all the `bot: build` commands:"
comment_status += "\n|arch|result|date|status|url|"
comment_status += "\n|----|------|----|------|---|"
comment_status += "\n|on|for|repo|result|date|status|url|"
comment_status += "\n|----|----|----|------|----|------|---|"
for x in range(0, len(status_table['date'])):
comment_status += f"\n|{status_table['arch'][x]}|"
comment_status += f"\n|{status_table['on arch'][x]}|"
comment_status += f"{status_table['for arch'][x]}|"
comment_status += f"{status_table['for repo'][x]}|"
comment_status += f"{status_table['result'][x]}|"
comment_status += f"{status_table['date'][x]}|"
comment_status += f"{status_table['status'][x]}|"
comment_status += f"{status_table['url'][x]}|"

self.log(f"Overview of finished builds: comment '{comment_status}'")
issue_comment = create_comment(repo_name, pr_number, comment_status, ChatLevels.MINIMAL)
return issue_comment
if issue_comment:
return f"\n - added status comment {issue_comment.html_url}"
else:
return "\n - failed to create status comment"

def start(self, app, port=3000):
"""
Expand Down Expand Up @@ -692,7 +697,7 @@ def main():
opts = event_handler_parse()

# config is read and checked for settings to raise an exception early when the event_handler starts.
if config.check_required_cfg_settings(REQUIRED_CONFIG):
if config.check_cfg_settings(REQUIRED_CONFIG):
print("Configuration check: PASSED")
else:
print("Configuration check: FAILED")
Expand Down
2 changes: 1 addition & 1 deletion eessi_bot_job_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,7 @@ def main():

# config is read and checked for settings to raise an exception early when
# the job_manager runs
if config.check_required_cfg_settings(REQUIRED_CONFIG):
if config.check_cfg_settings(REQUIRED_CONFIG):
print("Configuration check: PASSED")
else:
print("Configuration check: FAILED")
Expand Down
Loading