diff --git a/experiments/gromacs/experiment.py b/experiments/gromacs/experiment.py index 11b0c3a3e..f128ce9ab 100644 --- a/experiments/gromacs/experiment.py +++ b/experiments/gromacs/experiment.py @@ -18,14 +18,14 @@ class Gromacs( ): variant( "workload", - default="water_gmx50_adac", + default="water_gmx50", description="workload name", ) variant( "version", - default="2024", - values=("2024", "2023.3"), + default="2025.2", + values=("2025.2", "2024", "2023.3"), description="app version", ) @@ -35,12 +35,20 @@ class Gromacs( # on: turn on, but allow groamcs to disable it if GPU-aware MPI is not supported # force: turn on and force gromacs to use GPU-aware MPI. May result in error if unsupported variant( - "gpu-aware-mpi", + "direct-gpu-comm", default="on", values=("on", "off", "force"), description="Use GPU-aware MPI", ) + variant( + "sycl", + default=True, + values=(True, False), + when=("+rocm"), + description="Enable GPU-aware MPI", + ) + def compute_applications_section(self): if self.spec.satisfies("+openmp"): self.set_environment_variable("OMP_PROC_BIND", "close") @@ -79,6 +87,7 @@ def compute_applications_section(self): "pme": "auto", "bonded": f"{bonded_target}", "update": f"{target}", + "additional_args": " -pin {pin} -nb {nb} -pme {pme} -bonded {bonded} -update {update} -maxh {maxh} -nstlist {nstlist} -npme {npme} ", } for k, v in input_variables.items(): @@ -97,13 +106,13 @@ def compute_package_section(self): app_version = self.spec.variants["version"][0] spack_specs = "+mpi~hwloc" - spack_specs += "+sycl" if self.spec.satisfies("+rocm") else "~sycl" + spack_specs += "+sycl" if self.spec.satisfies("+sycl") else "~sycl" if self.spec.satisfies("+cuda") or self.spec.satisfies("+rocm"): - spack_specs += f" gpu-aware-mpi={self.spec.variants['gpu-aware-mpi'][0]} " + spack_specs += f" direct-gpu-comm={self.spec.variants['direct-gpu-comm'][0]} " spack_specs += " ~double " else: - spack_specs += " gpu-aware-mpi=off " + spack_specs += " direct-gpu-comm=off " self.add_package_spec( self.name, diff --git a/modifiers/allocation/modifier.py b/modifiers/allocation/modifier.py index f5973788b..71710c838 100644 --- a/modifiers/allocation/modifier.py +++ b/modifiers/allocation/modifier.py @@ -322,6 +322,7 @@ def slurm_instructions(self, v): sbatch_directives = list(f"#SBATCH {x}" for x in (srun_opts + sbatch_opts)) v.mpi_command = f"srun {' '.join(srun_opts)}" + v.single_rank_mpi_command = f"srun -n 1 -N 1 {'--gpus 1' if v.n_gpus else ''}" v.batch_submit = "sbatch {execute_experiment}" v.allocation_directives = "\n".join(sbatch_directives) @@ -390,6 +391,7 @@ def flux_instructions(self, v): batch_directives = list(f"# flux: {x}" for x in (cmd_opts + batch_opts)) v.mpi_command = f"flux run {' '.join([cmd_ranks] + cmd_opts)}" + v.single_rank_mpi_command = f"flux run -n 1 -N 1 {'-g=1' if v.n_gpus else ''}" v.batch_submit = "flux batch {execute_experiment}" v.allocation_directives = "\n".join(batch_directives) @@ -398,6 +400,7 @@ def mpi_instructions(self, v): cmd_opts.extend([f"-n {v.n_ranks}"]) v.mpi_command = "mpirun " + " ".join(cmd_opts) + v.single_rank_mpi_command = "mpirun -n 1" v.batch_submit = "{execute_experiment}" v.allocation_directives = "" @@ -428,6 +431,7 @@ def lsf_instructions(self, v): batch_directives = list(f"#BSUB {x}" for x in batch_opts) v.mpi_command = f"lrun {' '.join(cmd_opts)}" + v.single_rank_mpi_command = f"lrun -n 1 -N 1 {'-g 1' if v.n_gpus else ''}" v.batch_submit = "bsub {execute_experiment}" v.allocation_directives = "\n".join(batch_directives) @@ -444,6 +448,7 @@ def pjm_instructions(self, v): batch_directives = list(f"#PJM {x}" for x in batch_opts) v.mpi_command = "mpiexec " + " ".join(cmd_opts) + v.single_rank_mpi_command = "mpiexec --mpi proc=1" v.batch_submit = "pjsub {execute_experiment}" v.allocation_directives = "\n".join(batch_directives) diff --git a/repo/gromacs/application.py b/repo/gromacs/application.py index 1816883e8..3d44e8624 100644 --- a/repo/gromacs/application.py +++ b/repo/gromacs/application.py @@ -4,212 +4,443 @@ # SPDX-License-Identifier: Apache-2.0 import os + from ramble.appkit import * from ramble.expander import Expander class Gromacs(ExecutableApplication): - '''Define a Gromacs application''' - name = 'gromacs' - - maintainers('douglasjacobsen') - - tags('molecular-dynamics') - - define_compiler('gcc9', pkg_spec='gcc@9.3.0', package_manager='spack*') - software_spec('impi2018', pkg_spec='intel-mpi@2018.4.274', package_manager='spack*') - software_spec('gromacs', pkg_spec='gromacs@2023.3', compiler='gcc12', package_manager='spack*') - - executable('pre-process', 'gmx_mpi grompp ' + - '-f {input_path}/{type}.mdp ' + - '-c {input_path}/conf.gro ' + - '-p {input_path}/topol.top ' + - '-o exp_input.tpr', use_mpi=False) - executable('execute-nsteps-gen', 'gmx_mpi mdrun -notunepme ' + - '-v -resethway -noconfout -nsteps 4000 ' + - '-s exp_input.tpr', use_mpi=True) - executable('execute-nsteps', 'gmx_mpi mdrun -notunepme ' + - '-v -resethway -noconfout -nsteps 4000 ' + - '-s {input_path}', use_mpi=True) - executable('execute-adac', 'gmx_mpi mdrun ' + - '-resethway -noconfout -dlb {dlb} -pin {pin} -nb {nb} -pme {pme} -bonded {bonded} -update {update} ' + - '-maxh {maxh} -nsteps {nsteps} -notunepme -nstlist {nstlist} -npme {npme} ' + - '-v -s exp_input.tpr', use_mpi=True) - - input_file('water_gmx50_bare', url='https://ftp.gromacs.org/pub/benchmarks/water_GMX50_bare.tar.gz', - sha256='2219c10acb97787f80f6638132bad3ff2ca1e68600eef1bc8b89d9560e74c66a', - description='') - input_file('water_bare_hbonds', url='https://ftp.gromacs.org/pub/benchmarks/water_bare_hbonds.tar.gz', - sha256='b2e09d30f5c6b00ecf1c13ea6fa715ad132747863ef89f983f6c09a872cf2776', - description='') - input_file('lignocellulose', - url='https://repository.prace-ri.eu/ueabs/GROMACS/1.2/GROMACS_TestCaseB.tar.gz', - sha256='8a12db0232465e1d47c6a4eb89f615cdbbdc8fc360a86088b131331bd462f35c', - description='A model of cellulose and lignocellulosic biomass in an aqueous ' + - 'solution. This system of 3.3M atoms is inhomogeneous, at ' + - 'least with GROMACS 4.5. This system uses reaction-field' + - 'electrostatics instead of PME and therefore should scale well.') - input_file('HECBioSim', - url='https://github.com/victorusu/GROMACS_Benchmark_Suite/archive/refs/tags/1.0.0.tar.gz', - sha256='9cb2ad61ec2a422fc33578047e7cb2fd2c37ae9a75a6162d662fa2b711e9737f', - description='https://www.hecbiosim.ac.uk/access-hpc/benchmarks') - - input_file('BenchPEP', url='https://www.mpinat.mpg.de/benchPEP.zip', - sha256='f11745201dbb9e6a29a39cb016ee8123f6b0f519b250c94660f0a9623e497b22', - description='12M Atoms, Peptides in Water, 2fs time step, all bonds constrained. https://www.mpinat.mpg.de/grubmueller/bench') - - input_file('BenchPEP_h', url='https://www.mpinat.mpg.de/benchPEP-h.zip', - sha256='3ca8902fd9a6cf005b266f83b57217397b4ba4af987b97dc01e04185bd098bce', - description='12M Atoms, Peptides in Water, 2fs time step, h-bonds constrained. https://www.mpinat.mpg.de/grubmueller/bench') - - input_file('BenchMEM', url='https://www.mpinat.mpg.de/benchMEM.zip', - sha256='3c1c8cd4f274d532f48c4668e1490d389486850d6b3b258dfad4581aa11380a4', - description='82k atoms, protein in membrane surrounded by water, 2 fs time step. https://www.mpinat.mpg.de/grubmueller/bench') - - input_file('BenchRIB', url='https://www.mpinat.mpg.de/benchRIB.zip', - sha256='39acb014a79ed9a9ff2ad6294a2c09f9b85ea6986dfc204a3639814503eeb60a', - description='2 M atoms, ribosome in water, 4 fs time step. https://www.mpinat.mpg.de/grubmueller/bench') - - input_file('JCP_benchmarks', - url='https://zenodo.org/record/3893789/files/GROMACS_heterogeneous_parallelization_benchmark_info_and_systems_JCP.tar.gz?download=1', - sha256='82449291f44f4d5b7e5c192d688b57b7c2a2e267fe8b12e7a15b5d68f96c7b20', - description='GROMACS_heterogeneous_parallelization_benchmark_info_and_systems_JCP') - - workload('water_gmx50_adac', executables=['pre-process', 'execute-adac'], - input='water_gmx50_bare') - workload('water_gmx50', executables=['pre-process', 'execute-gen'], - input='water_gmx50_bare') - workload('water_bare', executables=['pre-process', 'execute-gen'], - input='water_bare_hbonds') - workload('lignocellulose', executables=['execute'], - input='lignocellulose') - workload('hecbiosim', executables=['execute'], - input='HECBioSim') - workload('benchpep', executables=['execute'], - input='BenchPEP') - workload('benchpep_h', executables=['execute'], - input='BenchPEP_h') - workload('benchmem', executables=['execute'], - input='BenchMEM') - workload('benchrib', executables=['execute'], - input='BenchRIB') - workload('stmv_rf', executables=['pre-process', 'execute-gen'], - input='JCP_benchmarks') - workload('stmv_pme', executables=['pre-process', 'execute-gen'], - input='JCP_benchmarks') - workload('rnase_cubic', executables=['pre-process', 'execute-gen'], - input='JCP_benchmarks') - workload('ion_channel', executables=['pre-process', 'execute-gen'], - input='JCP_benchmarks') - - workload_variable('dlb', default='no', - description='Dynamic load balancing (with DD): auto, no, yes', - workloads=['water_gmx50_adac']) - workload_variable('pin', default='off', - description='Whether mdrun should try to set thread affinities: auto, on, off', - workloads=['water_gmx50_adac']) - workload_variable('nb', default='auto', - description='Calculate non-bonded interactions on: auto, cpu, gpu', - workloads=['water_gmx50_adac']) - workload_variable('pme', default='auto', - description='Perform PME calculations on: auto, cpu, gpu', - workloads=['water_gmx50_adac']) - workload_variable('bonded', default='auto', - description='Perform bonded calculations on: auto, cpu, gpu', - workloads=['water_gmx50_adac']) - workload_variable('update', default='auto', - description='Perform update and constraints on: auto, cpu, gpu', - workloads=['water_gmx50_adac']) - workload_variable('maxh', default='0.05', - description='Terminate after 0.99 times this time (hours)', - workloads=['water_gmx50_adac']) - workload_variable('nsteps', default='-1', - description='Run this number of steps (-1 means infinite, -2 means use mdp option, smaller is invalid)', - workloads=['water_gmx50_adac']) - workload_variable('nstlist', default='200', - description='Set nstlist when using a Verlet buffer tolerance (0 is guess)', - workloads=['water_gmx50_adac']) - workload_variable('npme', default='0', - description='Number of separate ranks to be used for PME, -1 is guess', - workloads=['water_gmx50_adac']) - - workload_variable('size', default='1536', - values=['0000.65', '0000.96', '0001.5', - '0003', '0006', '0012', '0024', - '0048', '0096', '0192', '0384', - '0768', '1536', '3072'], - description='Workload size', - workloads=['water_gmx50', 'water_bare', 'water_gmx50_adac']) - workload_variable('type', default='pme', - description='Workload type.', - values=['pme', 'rf'], - workloads=['water_gmx50', 'water_bare', 'water_gmx50_adac']) - workload_variable('input_path', default='{water_gmx50_bare}/{size}', - description='Input path for water GMX50', - workloads=['water_gmx50', 'water_gmx50_adac']) - workload_variable('input_path', default='{water_bare_hbonds}/{size}', - description='Input path for water bare hbonds', - workload='water_bare') - workload_variable('input_path', default='{lignocellulose}/lignocellulose-rf.tpr', - description='Input path for lignocellulose', - workload='lignocellulose') - workload_variable('type', default='Crambin', - description='Workload type. Valid values are ''Crambin'', ''Glutamine-Binding-Protein'', ''hEGFRDimer'', ''hEGFRDimerPair'', ''hEGFRDimerSmallerPL'', ''hEGFRtetramerPair''', - workload='hecbiosim') - workload_variable('input_path', default='{HECBioSim}/HECBioSim/{type}/benchmark.tpr', - description='Input path for hecbiosim', - workload='hecbiosim') - workload_variable('input_path', default='{BenchPEP}/benchPEP.tpr', - description='Input path for Bench PEP workload', - workload='benchpep') - workload_variable('input_path', default='{BenchMEM}/benchMEM.tpr', - description='Input path for Bench MEM workload', - workload='benchmem') - workload_variable('input_path', default='{BenchRIB}/benchRIB.tpr', - description='Input path for Bench RIB workload', - workload='benchrib') - workload_variable('input_path', default='{BenchPEP_h}/benchPEP-h.tpr', - description='Input path for Bench PEP-h workload', - workload='benchpep_h') - workload_variable('type', default='rf_nvt', - description='Workload type for JCP_benchmarks', - workload='stmv_rf') - workload_variable('type', default='pme_nvt', - description='Workload type for JCP_benchmarks', - workload='stmv_pme') - workload_variable('type', default='grompp', - description='Workload type for JCP_benchmarks', - workloads=['ion_channel', 'rnase_cubic']) - workload_variable('input_path', default='{JCP_benchmarks}/stmv', - description='Input path for JCP_benchmark {workload_name}', - workloads=['stmv_rf', 'stmv_pme']) - workload_variable('input_path', default='{JCP_benchmarks}/{workload_name}', - description='Input path for JCP_benchmark {workload_name}', - workloads=['ion_channel', 'rnase_cubic']) - - log_str = os.path.join(Expander.expansion_str('experiment_run_dir'), - 'md.log') - - figure_of_merit('Core Time', log_file=log_str, - fom_regex=r'\s+Time:\s+(?P[0-9]+\.[0-9]+).*', - group_name='core_time', units='s') - - figure_of_merit('Wall Time', log_file=log_str, - fom_regex=r'\s+Time:\s+[0-9]+\.[0-9]+\s+' + - r'(?P[0-9]+\.[0-9]+).*', - group_name='wall_time', units='s') - - figure_of_merit('Percent Core Time', log_file=log_str, - fom_regex=r'\s+Time:\s+[0-9]+\.[0-9]+\s+[0-9]+\.[0-9]+\s+' + - r'(?P[0-9]+\.[0-9]+).*', - group_name='perc_core_time', units='%') - - figure_of_merit('Nanosecs per day', log_file=log_str, - fom_regex=r'Performance:\s+' + - r'(?P[0-9]+\.[0-9]+).*', - group_name='ns_per_day', units='ns/day') - - figure_of_merit('Hours per nanosec', log_file=log_str, - fom_regex=r'Performance:\s+[0-9]+\.[0-9]+\s+' + - r'(?P[0-9]+\.[0-9]+).*', - group_name='hours_per_ns', units='hours/ns') + """Define a Gromacs application""" + + name = "gromacs" + + maintainers("douglasjacobsen") + + tags("molecular-dynamics") + + with when("package_manager_family=spack"): + define_compiler("gcc9", pkg_spec="gcc@9.3.0") + + software_spec( + "impi2018", + pkg_spec="intel-mpi@2018.4.274", + ) + + with default_args(compiler="gcc9"): + software_spec( + "spack_gromacs", + pkg_spec="gromacs@2020.5", + ) + + software_spec( + "eessi_gromacs", + pkg_spec="GROMACS/2024.1-foss-2023b", + when=["package_manager_family=eessi"], + ) + + executable( + "pre-process", + "{grompp} " + + "-f {input_path}/{type}.mdp " + + "-c {input_path}/conf.gro " + + "-p {input_path}/topol.top " + + "-o exp_input.tpr", + use_mpi=False, + output_capture=OUTPUT_CAPTURE.ALL, + ) + executable( + "execute-gen", + "{mdrun} {notunepme} -dlb {dlb} " + + "{verbose} -resetstep {resetstep} -noconfout -nsteps {nsteps} " + + "-s exp_input.tpr {additional_args}", + use_mpi=True, + output_capture=OUTPUT_CAPTURE.ALL, + ) + executable( + "execute", + "{mdrun} {notunepme} -dlb {dlb} " + + "{verbose} -resetstep {resetstep} -noconfout -nsteps {nsteps} " + + "-s {input_path} {additional_args}", + use_mpi=True, + output_capture=OUTPUT_CAPTURE.ALL, + ) + + input_file( + "water_gmx50_bare", + url="https://ftp.gromacs.org/pub/benchmarks/water_GMX50_bare.tar.gz", + sha256="2219c10acb97787f80f6638132bad3ff2ca1e68600eef1bc8b89d9560e74c66a", + description="", + ) + input_file( + "water_bare_hbonds", + url="https://ftp.gromacs.org/pub/benchmarks/water_bare_hbonds.tar.gz", + sha256="b2e09d30f5c6b00ecf1c13ea6fa715ad132747863ef89f983f6c09a872cf2776", + description="", + ) + input_file( + "lignocellulose", + url="https://repository.prace-ri.eu/ueabs/GROMACS/1.2/GROMACS_TestCaseB.tar.gz", + sha256="8a12db0232465e1d47c6a4eb89f615cdbbdc8fc360a86088b131331bd462f35c", + description="A model of cellulose and lignocellulosic biomass in an aqueous " + + "solution. This system of 3.3M atoms is inhomogeneous, at " + + "least with GROMACS 4.5. This system uses reaction-field" + + "electrostatics instead of PME and therefore should scale well.", + ) + input_file( + "HECBioSim", + url="https://github.com/victorusu/GROMACS_Benchmark_Suite/archive/refs/tags/1.0.0.tar.gz", + sha256="9cb2ad61ec2a422fc33578047e7cb2fd2c37ae9a75a6162d662fa2b711e9737f", + description="https://www.hecbiosim.ac.uk/access-hpc/benchmarks", + ) + + input_file( + "BenchPEP", + url="https://www.mpinat.mpg.de/benchPEP.zip", + sha256="f11745201dbb9e6a29a39cb016ee8123f6b0f519b250c94660f0a9623e497b22", + description="12M Atoms, Peptides in Water, 2fs time step, all bonds constrained. https://www.mpinat.mpg.de/grubmueller/bench", + ) + + input_file( + "BenchPEP_h", + url="https://www.mpinat.mpg.de/benchPEP-h.zip", + sha256="3ca8902fd9a6cf005b266f83b57217397b4ba4af987b97dc01e04185bd098bce", + description="12M Atoms, Peptides in Water, 2fs time step, h-bonds constrained. https://www.mpinat.mpg.de/grubmueller/bench", + ) + + input_file( + "BenchMEM", + url="https://www.mpinat.mpg.de/benchMEM.zip", + sha256="3c1c8cd4f274d532f48c4668e1490d389486850d6b3b258dfad4581aa11380a4", + description="82k atoms, protein in membrane surrounded by water, 2 fs time step. https://www.mpinat.mpg.de/grubmueller/bench", + ) + + input_file( + "BenchRIB", + url="https://www.mpinat.mpg.de/benchRIB.zip", + sha256="39acb014a79ed9a9ff2ad6294a2c09f9b85ea6986dfc204a3639814503eeb60a", + description="2 M atoms, ribosome in water, 4 fs time step. https://www.mpinat.mpg.de/grubmueller/bench", + ) + + input_file( + "JCP_benchmarks", + url="https://zenodo.org/record/3893789/files/GROMACS_heterogeneous_parallelization_benchmark_info_and_systems_JCP.tar.gz?download=1", + sha256="82449291f44f4d5b7e5c192d688b57b7c2a2e267fe8b12e7a15b5d68f96c7b20", + description="GROMACS_heterogeneous_parallelization_benchmark_info_and_systems_JCP", + ) + + workload( + "water_gmx50", + executables=["pre-process", "execute-gen"], + input="water_gmx50_bare", + ) + workload( + "water_bare", + executables=["pre-process", "execute-gen"], + input="water_bare_hbonds", + ) + workload("lignocellulose", executables=["execute"], input="lignocellulose") + workload("hecbiosim", executables=["execute"], input="HECBioSim") + workload("benchpep", executables=["execute"], input="BenchPEP") + workload("benchpep_h", executables=["execute"], input="BenchPEP_h") + workload("benchmem", executables=["execute"], input="BenchMEM") + workload("benchrib", executables=["execute"], input="BenchRIB") + workload( + "stmv_rf", + executables=["pre-process", "execute-gen"], + input="JCP_benchmarks", + ) + workload( + "stmv_pme", + executables=["pre-process", "execute-gen"], + input="JCP_benchmarks", + ) + workload( + "rnase_cubic", + executables=["pre-process", "execute-gen"], + input="JCP_benchmarks", + ) + workload( + "ion_channel", + executables=["pre-process", "execute-gen"], + input="JCP_benchmarks", + ) + workload( + "adh_dodec", + executables=["pre-process", "execute-gen"], + input="JCP_benchmarks", + ) + + workload_group( + "all_workloads", + workloads=[ + "water_gmx50", + "water_bare", + "lignocellulose", + "hecbiosim", + "benchpep", + "benchpep_h", + "benchmem", + "benchrib", + "stmv_rf", + "stmv_pme", + "rnase_cubic", + "ion_channel", + "adh_dodec", + ], + ) + + workload_variable( + "single_rank_mpi_command", + default="", + description="Job scheduler command for running on a single mpi rank", + workload_group="all_workloads", + ) + + workload_variable( + "additional_args", + default="", + description="Additiaonl Exec Args", + workload_group="all_workloads", + ) + workload_variable( + "gmx", + default="gmx_mpi", + description="Name of the gromacs binary", + workload_group="all_workloads", + ) + workload_variable( + "grompp", + default="{single_rank_mpi_command} {gmx} grompp", + description="How to run grompp", + workload_group="all_workloads", + ) + workload_variable( + "mdrun", + default="{gmx} mdrun", + description="How to run mdrun", + workload_group="all_workloads", + ) + workload_variable( + "nsteps", + default=str(20000), + description="Simulation steps", + workload_group="all_workloads", + ) + workload_variable( + "resetstep", + default="{str(int(0.9*{nsteps}))}", + description="Reset performance counters at this step", + workload_group="all_workloads", + ) + workload_variable( + "verbose", + default="", + values=["", "-v"], + description="Set to empty string to run without verbose mode", + workload_group="all_workloads", + ) + workload_variable( + "notunepme", + default="-notunepme", + values=["", "-notunepme"], + description="Whether to set -notunepme for mdrun", + workload_group="all_workloads", + ) + workload_variable( + "dlb", + default="yes", + values=["yes", "no", "auto"], + description="Whether to use dynamic load balancing for mdrun", + workload_group="all_workloads", + ) + + workload_variable( + "size", + default="1536", + values=[ + "0000.65", + "0000.96", + "0001.5", + "0003", + "0006", + "0012", + "0024", + "0048", + "0096", + "0192", + "0384", + "0768", + "1536", + "3072", + ], + description="Workload size", + workloads=["water_gmx50", "water_bare"], + expandable=False, + ) + workload_variable( + "type", + default="pme", + description="Workload type.", + values=["pme", "rf"], + workloads=["water_gmx50", "water_bare"], + ) + workload_variable( + "input_path", + default="{water_gmx50_bare}/{size}", + description="Input path for water GMX50", + workload="water_gmx50", + ) + workload_variable( + "input_path", + default="{water_bare_hbonds}/{size}", + description="Input path for water bare hbonds", + workload="water_bare", + ) + workload_variable( + "input_path", + default="{lignocellulose}/lignocellulose-rf.tpr", + description="Input path for lignocellulose", + workload="lignocellulose", + ) + workload_variable( + "type", + default="Crambin", + description="Workload type. Valid values are " + "Crambin" + ", " + "Glutamine-Binding-Protein" + ", " + "hEGFRDimer" + ", " + "hEGFRDimerPair" + ", " + "hEGFRDimerSmallerPL" + ", " + "hEGFRtetramerPair" + "", + workload="hecbiosim", + ) + workload_variable( + "input_path", + default="{HECBioSim}/HECBioSim/{type}/benchmark.tpr", + description="Input path for hecbiosim", + workload="hecbiosim", + ) + workload_variable( + "input_path", + default="{BenchPEP}/benchPEP.tpr", + description="Input path for Bench PEP workload", + workload="benchpep", + ) + workload_variable( + "input_path", + default="{BenchMEM}/benchMEM.tpr", + description="Input path for Bench MEM workload", + workload="benchmem", + ) + workload_variable( + "input_path", + default="{BenchRIB}/benchRIB.tpr", + description="Input path for Bench RIB workload", + workload="benchrib", + ) + workload_variable( + "input_path", + default="{BenchPEP_h}/benchPEP-h.tpr", + description="Input path for Bench PEP-h workload", + workload="benchpep_h", + ) + workload_variable( + "type", + default="rf_nvt", + description="Workload type for JCP_benchmarks", + workload="stmv_rf", + ) + workload_variable( + "type", + default="pme_nvt", + description="Workload type for JCP_benchmarks", + workload="stmv_pme", + ) + workload_variable( + "type", + default="grompp", + description="Workload type for JCP_benchmarks", + workloads=["ion_channel", "rnase_cubic"], + ) + workload_variable( + "input_path", + default="{JCP_benchmarks}/stmv", + description="Input path for JCP_benchmark {workload_name}", + workloads=["stmv_rf", "stmv_pme"], + ) + workload_variable( + "input_path", + default="{JCP_benchmarks}/{workload_name}", + description="Input path for JCP_benchmark {workload_name}", + workloads=["ion_channel", "rnase_cubic"], + ) + workload_variable( + "input_path", + default="{JCP_benchmarks}/{workload_name}", + description="Input path for JCP_benchmark {workload_name}", + workloads=["adh_dodec"], + ) + workload_variable( + "type", + default="pme_verlet", + description="Workload type for JCP_benchmarks", + workloads=["adh_dodec"], + ) + + log_str = os.path.join( + Expander.expansion_str("experiment_run_dir"), "md.log" + ) + + figure_of_merit( + "Core Time", + log_file=log_str, + fom_regex=r"\s+Time:\s+(?P[0-9]+\.[0-9]+)", + group_name="core_time", + units="s", + fom_type=FomType.TIME, + ) + + figure_of_merit( + "Wall Time", + log_file=log_str, + fom_regex=r"\s+Time:\s+[0-9]+\.[0-9]+\s+" + + r"(?P[0-9]+\.[0-9]+)", + group_name="wall_time", + units="s", + fom_type=FomType.TIME, + ) + + figure_of_merit( + "Percent Core Time", + log_file=log_str, + fom_regex=r"\s+Time:\s+[0-9]+\.[0-9]+\s+[0-9]+\.[0-9]+\s+" + + r"(?P[0-9]+\.[0-9]+)", + group_name="perc_core_time", + units="%", + fom_type=FomType.MEASURE, + ) + + figure_of_merit( + "Nanosecs per day", + log_file=log_str, + fom_regex=r"Performance:\s+" + r"(?P[0-9]+\.[0-9]+)", + group_name="ns_per_day", + units="ns/day", + fom_type=FomType.THROUGHPUT, + ) + + figure_of_merit( + "Hours per nanosec", + log_file=log_str, + fom_regex=r"Performance:\s+[0-9]+\.[0-9]+\s+" + + r"(?P[0-9]+\.[0-9]+)", + group_name="hours_per_ns", + units="hours/ns", + fom_type=FomType.INFO, + ) diff --git a/repo/gromacs/package.py b/repo/gromacs/package.py index 363532627..94c95cc0b 100644 --- a/repo/gromacs/package.py +++ b/repo/gromacs/package.py @@ -5,8 +5,6 @@ import os -import llnl.util.filesystem as fs - import spack.build_systems.cmake from spack.package import * @@ -27,64 +25,112 @@ class Gromacs(CMakePackage, CudaPackage, ROCmPackage): url = "https://ftp.gromacs.org/gromacs/gromacs-2022.2.tar.gz" list_url = "https://ftp.gromacs.org/gromacs" git = "https://gitlab.com/gromacs/gromacs.git" - maintainers("danielahlin", "eirrgang", "junghans") - - license("BSD-2-Clause") + maintainers("mabraham", "eirrgang", "junghans") + + license("GPL-2.0-or-later", when="@:4.5") + license("LGPL-2.1-or-later", when="@4.6:") + + # Deprecation policy: + # + # GROMACS makes an annual major release and supports it with fixes + # in minor updates for about two years. Each such annual release + # series is supported in spack for those two years, then marked as + # deprecated in Spack. Deprecated versions can be removed after + # the next major release of GROMACS is supported in Spack. Users + # needing such an old version can either do a manual installation + # or get an older version of Spack. + # + # Exception: Version 2019.6 is the last version capable of tabulated + # interactions used in the so-called "group scheme." It will be marked + # as deprecated only after equivalent functionality is available in + # a major release of GROMACS, then removed as above. + # + # Exception: Otherwise, versions before 2022 will be removed when + # 2025 is supported. version("main", branch="main") version("master", branch="main", deprecated=True) + version("2025.2", sha256="0df09f9d45a99ef00e66b9baa9493a27e906813763a3b6c7672217c66b43ea11") + version("2025.1", sha256="0adf621a80fd8043f8defec84ce02811c0cdf42a052232890932d81f25c4d28a") + version("2025.0", sha256="a27ad35a646295bbec129abe684d9d03d1e2e0bd76b0d625e9055746aaefae82") + version("2024.5", sha256="fecf06b186cddb942cfb42ee8da5f3eb2b9993e6acc0a2f18d14ac0b014424f3") + version("2024.4", sha256="ac618ece2e58afa86b536c5a2c4fcb937f0760318f12d18f10346b6bdebd86a8") + version("2024.3", sha256="bbda056ee59390be7d58d84c13a9ec0d4e3635617adf2eb747034922cba1f029") + version("2024.2", sha256="802a7e335f2e895770f57b159e4ec368ebb0ff2ce6daccf706c6e8025c36852b") + version("2024.1", sha256="937d8f12a36fffbf2af7add71adbb5aa5c5537892d46c9a76afbecab1aa0aac7") version("2024", sha256="04d226d52066a8bc3a42e00d6213de737b4ec292e26703065924ff01956801e2") - version("2023.4", sha256="e5d6c4d9e7ccacfaccb0888619bd21b5ea8911f82b410e68d6db5d40f695f231") - version("2023.3", sha256="4ec8f8d0c7af76b13f8fd16db8e2c120e749de439ae9554d9f653f812d78d1cb") - version("2023.2", sha256="bce1480727e4b2bb900413b75d99a3266f3507877da4f5b2d491df798f9fcdae") - version("2023.1", sha256="eef2bb4a6cb6314cf9da47f26df2a0d27af4bf7b3099723d43601073ab0a42f4") - version("2023", sha256="ac92c6da72fbbcca414fd8a8d979e56ecf17c4c1cdabed2da5cfb4e7277b7ba8") - version("2022.6", sha256="75d277138475679dd3e334e384a71516570cde767310476687f2a5b72333ea41") - version("2022.5", sha256="083cc3c424bb93ffe86c12f952e3e5b4e6c9f6520de5338761f24b75e018c223") - version("2022.4", sha256="c511be602ff29402065b50906841def98752639b92a95f1b0a1060d9b5e27297") - version("2022.3", sha256="14cfb130ddaf8f759a3af643c04f5a0d0d32b09bc3448b16afa5b617f5e35dae") - version("2022.2", sha256="656404f884d2fa2244c97d2a5b92af148d0dbea94ad13004724b3fcbf45e01bf") - version("2022.1", sha256="85ddab5197d79524a702c4959c2c43be875e0fc471df3a35224939dce8512450") - version("2022", sha256="fad60d606c02e6164018692c6c9f2c159a9130c2bf32e8c5f4f1b6ba2dda2b68") - version("2021.7", sha256="4db7bbbfe5424de48373686ec0e8c5bfa7175d5cd74290ef1c1e840e6df67f06") - version("2021.6", sha256="52df2c1d7586fd028d9397985c68bd6dd26e6e905ead382b7e6c473d087902c3") - version("2021.5", sha256="eba63fe6106812f72711ef7f76447b12dd1ee6c81b3d8d4d0e3098cd9ea009b6") - version("2021.4", sha256="cb708a3e3e83abef5ba475fdb62ef8d42ce8868d68f52dafdb6702dc9742ba1d") - version("2021.3", sha256="e109856ec444768dfbde41f3059e3123abdb8fe56ca33b1a83f31ed4575a1cc6") - version("2021.2", sha256="d940d865ea91e78318043e71f229ce80d32b0dc578d64ee5aa2b1a4be801aadb") - version("2021.1", sha256="bc1d0a75c134e1fb003202262fe10d3d32c59bbb40d714bc3e5015c71effe1e5") - version("2021", sha256="efa78ab8409b0f5bf0fbca174fb8fbcf012815326b5c71a9d7c385cde9a8f87b") - version("2020.7", sha256="744158d8f61b0d36ffe89ec934519b7e0981a7af438897740160da648d36c2f0") - version("2020.6", sha256="d8bbe57ed3c9925a8cb99ecfe39e217f930bed47d5268a9e42b33da544bdb2ee") - version("2020.5", sha256="7b6aff647f7c8ee1bf12204d02cef7c55f44402a73195bd5f42cf11850616478") - version("2020.4", sha256="5519690321b5500c7951aaf53ff624042c3edd1a5f5d6dd1f2d802a3ecdbf4e6") - version("2020.3", sha256="903183691132db14e55b011305db4b6f4901cc4912d2c56c131edfef18cc92a9") - version("2020.2", sha256="7465e4cd616359d84489d919ec9e4b1aaf51f0a4296e693c249e83411b7bd2f3") - version("2020.1", sha256="e1666558831a3951c02b81000842223698016922806a8ce152e8f616e29899cf") - version("2020", sha256="477e56142b3dcd9cb61b8f67b24a55760b04d1655e8684f979a75a5eec40ba01") + version( + "2023.5", + sha256="9cc491d3601a5fe0ec0de727e4432c34877f596fe8a463d4cf0f0f53fb34d08b", + deprecated=True, + ) + version( + "2023.4", + sha256="e5d6c4d9e7ccacfaccb0888619bd21b5ea8911f82b410e68d6db5d40f695f231", + deprecated=True, + ) + version( + "2023.3", + sha256="4ec8f8d0c7af76b13f8fd16db8e2c120e749de439ae9554d9f653f812d78d1cb", + deprecated=True, + ) + version( + "2023.2", + sha256="bce1480727e4b2bb900413b75d99a3266f3507877da4f5b2d491df798f9fcdae", + deprecated=True, + ) + version( + "2023.1", + sha256="eef2bb4a6cb6314cf9da47f26df2a0d27af4bf7b3099723d43601073ab0a42f4", + deprecated=True, + ) + version( + "2023", + sha256="ac92c6da72fbbcca414fd8a8d979e56ecf17c4c1cdabed2da5cfb4e7277b7ba8", + deprecated=True, + ) + version( + "2022.6", + sha256="75d277138475679dd3e334e384a71516570cde767310476687f2a5b72333ea41", + deprecated=True, + ) + version( + "2022.5", + sha256="083cc3c424bb93ffe86c12f952e3e5b4e6c9f6520de5338761f24b75e018c223", + deprecated=True, + ) + version( + "2022.4", + sha256="c511be602ff29402065b50906841def98752639b92a95f1b0a1060d9b5e27297", + deprecated=True, + ) + version( + "2022.3", + sha256="14cfb130ddaf8f759a3af643c04f5a0d0d32b09bc3448b16afa5b617f5e35dae", + deprecated=True, + ) + version( + "2022.2", + sha256="656404f884d2fa2244c97d2a5b92af148d0dbea94ad13004724b3fcbf45e01bf", + deprecated=True, + ) + version( + "2022.1", + sha256="85ddab5197d79524a702c4959c2c43be875e0fc471df3a35224939dce8512450", + deprecated=True, + ) + version( + "2022", + sha256="fad60d606c02e6164018692c6c9f2c159a9130c2bf32e8c5f4f1b6ba2dda2b68", + deprecated=True, + ) + # See exception documented above version("2019.6", sha256="bebe396dc0db11a9d4cc205abc13b50d88225617642508168a2195324f06a358") - version("2019.5", sha256="438061a4a2d45bbb5cf5c3aadd6c6df32d2d77ce8c715f1c8ffe56156994083a") - version("2019.4", sha256="ba4366eedfc8a1dbf6bddcef190be8cd75de53691133f305a7f9c296e5ca1867") - version("2019.3", sha256="4211a598bf3b7aca2b14ad991448947da9032566f13239b1a05a2d4824357573") - version("2019.2", sha256="bcbf5cc071926bc67baa5be6fb04f0986a2b107e1573e15fadcb7d7fc4fb9f7e") - version("2019.1", sha256="b2c37ed2fcd0e64c4efcabdc8ee581143986527192e6e647a197c76d9c4583ec") - version("2019", sha256="c5b281a5f0b5b4eeb1f4c7d4dc72f96985b566561ca28acc9c7c16f6ee110d0b") - version("2018.8", sha256="776923415df4bc78869d7f387c834141fdcda930b2e75be979dc59ecfa6ebecf") - version("2018.5", sha256="32261df6f7ec4149fc0508f9af416953d056e281590359838c1ed6644ba097b8") - version("2018.4", sha256="6f2ee458c730994a8549d6b4f601ecfc9432731462f8bd4ffa35d330d9aaa891") - version("2018.3", sha256="4423a49224972969c52af7b1f151579cea6ab52148d8d7cbae28c183520aa291") - version("2018.2", sha256="4bdde8120c510b6543afb4b18f82551fddb11851f7edbd814aa24022c5d37857") - version("2018.1", sha256="4d3533340499323fece83b4a2d4251fa856376f2426c541e00b8e6b4c0d705cd") - version("2018", sha256="deb5d0b749a52a0c6083367b5f50a99e08003208d81954fb49e7009e1b1fd0e9") - version("2016.6", sha256="bac0117d2cad21f9b94fe5b854fb9ae7435b098a6da4e732ee745f18e52473d7") - version("2016.5", sha256="57db26c6d9af84710a1e0c47a1f5bf63a22641456448dcd2eeb556ebd14e0b7c") - version("2016.4", sha256="4be9d3bfda0bdf3b5c53041e0b8344f7d22b75128759d9bfa9442fe65c289264") - version("2016.3", sha256="7bf00e74a9d38b7cef9356141d20e4ba9387289cbbfd4d11be479ef932d77d27") - version("5.1.5", sha256="c25266abf07690ecad16ed3996899b1d489cbb1ef733a1befb3b5c75c91a703e") - version("5.1.4", sha256="0f3793d8f1f0be747cf9ebb0b588fb2b2b5dc5acc32c3046a7bee2d2c03437bc") - version("5.1.2", sha256="39d6f1d7ae8ba38cea6089da40676bfa4049a49903d21551abc030992a58f304") - version("4.6.7", sha256="6afb1837e363192043de34b188ca3cf83db6bd189601f2001a1fc5b0b2a214d9") - version("4.5.5", sha256="e0605e4810b0d552a8761fef5540c545beeaf85893f4a6e21df9905a33f871ba") + + depends_on("c", type="build") + depends_on("cxx", type="build") + depends_on("fortran", type="build", when="@:4.5.5") # No core Fortran code since 4.6 + depends_on("fortran", type="build", when="+cp2k") # Need Fortran compiler for CP2K variant( "mpi", default=True, description="Activate MPI support (disable for Thread-MPI support)" @@ -93,7 +139,7 @@ class Gromacs(CMakePackage, CudaPackage, ROCmPackage): # on: turn on, but allow groamcs to disable it if GPU-aware MPI is not supported # force: turn on and force gromacs to use GPU-aware MPI. May result in error if unsupported variant( - "gpu-aware-mpi", + "direct-gpu-comm", default="on", values=("on", "off", "force"), when="@2021: +mpi", @@ -111,16 +157,36 @@ class Gromacs(CMakePackage, CudaPackage, ROCmPackage): when="@2022: +cuda+mpi", description="Enable multi-GPU FFT support with cuFFTMp", ) - variant( "heffte", default=False, - when="@2021: +sycl+mpi", + when="@2021: +mpi", description="Enable multi-GPU FFT support with HeFFTe", ) + depends_on("heffte +cuda", when="+heffte +cuda") + depends_on("heffte +sycl", when="+heffte +sycl") variant("opencl", default=False, description="Enable OpenCL support") variant("sycl", default=False, when="@2021:", description="Enable SYCL support") - variant("sycl", default=True, when="@2021: +rocm", description="Enable SYCL support when using ROCm") + + with when("+rocm"): + depends_on("hip") + depends_on("rocfft") + with when("+sycl"): + depends_on("hipsycl") + depends_on(f"hipsycl@24.10.0:", when=f"@2025:") + for target in ("none", "gfx803", "gfx900", "gfx906", "gfx908", "gfx90a", "gfx942"): + depends_on(f"hipsycl +rocm amdgpu_target={target}", when=f"+rocm amdgpu_target={target}") + + requires( + "^intel-oneapi-runtime", + "^hipsycl %clang", + "^hipsycl %rocmcc", + policy="one_of", + when="+sycl", + msg="GROMACS SYCL support comes either from intel-oneapi-runtime or a " + + "package that provides the virtual package `sycl`, such as AdaptiveCpp " + + "plus a clang compiler.", + ) variant( "intel-data-center-gpu-max", default=False, @@ -163,7 +229,25 @@ class Gromacs(CMakePackage, CudaPackage, ROCmPackage): conflicts( "+mdrun_only", when="@2021:", msg="mdrun-only build option was removed for GROMACS 2021." ) + variant( + "nvshmem", + default=False, + when="@2024:+mpi+cuda", + description="Enable NVSHMEM support for Nvidia GPUs", + ) + conflicts( + "+nvshmem", + when="+cufftmp", + msg=( + "The GROMACS support for NVSHMEM does not work with the GROMACS support " + "for cuFFTMp (even though cuFFTMp uses NVSHMEM in its implementation)" + ), + ) + variant("openmp", default=True, description="Enables OpenMP at configure time") + conflicts( + "+openmp", when="%apple-clang", msg="OpenMP not available for the Apple clang compiler" + ) variant("openmp_max_threads", default="none", description="Max number of OpenMP threads") conflicts( "+openmp_max_threads", when="~openmp", msg="OpenMP is off but OpenMP Max threads is set" @@ -172,7 +256,7 @@ class Gromacs(CMakePackage, CudaPackage, ROCmPackage): "sve", default=True, description="Enable SVE on aarch64 if available", - when="target=neoverse_v1", + when="target=neoverse_v1:,neoverse_v2:,neoverse_n2:", ) variant( "sve", default=True, description="Enable SVE on aarch64 if available", when="target=a64fx" @@ -236,16 +320,21 @@ class Gromacs(CMakePackage, CudaPackage, ROCmPackage): # Above dependencies can be verified, and new versions added, by going to # https://github.com/plumed/plumed2/tree/v2.9.0/patches # and switching tags. + + # Versions without minor release number, such as `2023` and `2021`, + # require exact specification using `@=`, starting from Spack v0.20.0, + # see https://github.com/spack/spack/releases/tag/v0.20.0 + plumed_patches = { - "2023": "2.9.0", - "2022.5": "2.8.2:2.9.0", + "=2023": "2.9.1", + "2022.5": "2.8.2:2.9.1", "2022.3": "2.8.1", - "2021.7": "2.8.2:2.9.0", + "2021.7": "2.8.2:2.9.1", "2021.6": "2.8.1", "2021.5": "2.7.5:2.7.6", "2021.4": "2.7.3:2.8.0", - "2021": "2.7.1:2.7.2", - "2020.7": "2.8.1:2.9.0", + "=2021": "2.7.1:2.7.2", + "2020.7": "2.8.1:2.9.1", "2020.6": "2.7.2:2.8.0", "2020.5": "2.7.1", "2020.4": "2.6.2:2.7.0", @@ -275,32 +364,42 @@ class Gromacs(CMakePackage, CudaPackage, ROCmPackage): variant( "intel_provided_gcc", default=False, - description="Use this if Intel compiler is installed through spack." + description="Use this if Intel compiler is installed through spack. " + "The g++ location is written to icp{c,x}.cfg", ) + variant( + "itt", + default=False, + when="@2024:", + description="Enable Instrumentation and Tracing Technology (ITT)" + + " profiling API (from Intel)", + ) + depends_on("intel-oneapi-vtune", "+itt") + depends_on("fftw-api@3") depends_on("cmake@2.8.8:3", type="build") depends_on("cmake@3.4.3:3", type="build", when="@2018:") depends_on("cmake@3.9.6:3", type="build", when="@2020") depends_on("cmake@3.13.0:3", type="build", when="@2021") depends_on("cmake@3.16.3:3", type="build", when="@2022:") + depends_on("cmake@3.28:", type="build", when="@2025:") depends_on("cmake@3.18.4:3", type="build", when="@main") depends_on("cmake@3.16.0:3", type="build", when="%fj") - depends_on("cuda", when="+cuda") - - for target in ("none", "gfx803", "gfx900", "gfx906", "gfx908", "gfx90a", "gfx942"): - requires(f"^hipsycl@23.10.0+rocm amdgpu_target={target}", when=f"gromacs@2024+rocm amdgpu_target={target}") - - with when("+rocm"): - depends_on("sycl") - depends_on("hip") - depends_on("rocfft") + depends_on("pkgconfig", type="build") + depends_on("cuda", when="+cuda") + depends_on("sycl", when="+sycl") depends_on("lapack") depends_on("blas") - depends_on("gcc", when="%oneapi ~intel_provided_gcc") - depends_on("gcc", when="%intel ~intel_provided_gcc") + depends_on("gcc", when="~intel_provided_gcc %intel") + # TODO this can be expanded to all clang-based compilers once + # the principle is demonstrated to work + with when("~intel_provided_gcc %oneapi"): + depends_on("gcc-runtime@5:", when="@2020") + depends_on("gcc-runtime@7:", when="@2021:2022") + depends_on("gcc-runtime@9:", when="@2023:2024") + depends_on("gcc-runtime@11:", when="@2025:") depends_on("hwloc@1.0:1", when="+hwloc@2016:2018") depends_on("hwloc", when="+hwloc@2019:") @@ -308,8 +407,11 @@ class Gromacs(CMakePackage, CudaPackage, ROCmPackage): depends_on("cp2k@8.1:", when="+cp2k") depends_on("nvhpc", when="+cufftmp") + depends_on("nvhpc", when="+nvshmem") depends_on("heffte", when="+heffte") + conflicts("cmake@:3.27", when="@:2024") + requires( "%intel", "%oneapi", @@ -319,9 +421,8 @@ class Gromacs(CMakePackage, CudaPackage, ROCmPackage): ) # If the Intel suite is used for Lapack, it must be used for fftw and vice-versa - for _intel_pkg in INTEL_MATH_LIBRARIES: - requires(f"^[virtuals=fftw-api] {_intel_pkg}", when=f"^[virtuals=lapack] {_intel_pkg}") - requires(f"^[virtuals=lapack] {_intel_pkg}", when=f"^[virtuals=fftw-api] {_intel_pkg}") + requires("^[virtuals=fftw-api] intel-oneapi-mkl", when="^[virtuals=lapack] intel-oneapi-mkl") + requires("^[virtuals=lapack] intel-oneapi-mkl", when="^[virtuals=fftw-api] intel-oneapi-mkl") patch("gmxDetectCpu-cmake-3.14.patch", when="@2018:2019.3^cmake@3.14.0:") patch("gmxDetectSimd-cmake-3.14.patch", when="@5.0:2017^cmake@3.14.0:") @@ -373,14 +474,14 @@ def patch(self): string=True, ) - if "+plumed" in self.spec: - self.spec["plumed"].package.apply_patch(self) + if self.spec.satisfies("+plumed"): + self["plumed"].apply_patch(self) if self.spec.satisfies("%nvhpc"): # Disable obsolete workaround filter_file("ifdef __PGI", "if 0", "src/gromacs/fileio/xdrf.h") - if "+cuda" in self.spec: + if self.spec.satisfies("+cuda"): # Upstream supports building of last two major versions of Gromacs. # Older versions of Gromacs need to be patched to build with more recent # versions of CUDA library. @@ -420,11 +521,23 @@ def setup_run_environment(self, env): if self.compiler.extra_rpaths: for rpath in self.compiler.extra_rpaths: env.prepend_path("LD_LIBRARY_PATH", rpath) + if self.spec.satisfies("+cufftmp"): + env.append_path( + "LD_LIBRARY_PATH", + join_path( + self.spec["nvhpc"].prefix, + f"Linux_{self.spec.target.family}", + self.spec["nvhpc"].version, + "comm_libs", + "nvshmem", + "lib", + ), + ) + if "force" in self.spec.variants["direct-gpu-comm"].value: + env.set("GMX_FORCE_GPU_AWARE_MPI", "1") + elif "off" in self.spec.variants["direct-gpu-comm"].value: + env.set("GMX_DISABLE_DIRECT_GPU_COMM", "1") - def setup_build_environment(self, env): - if self.compiler.extra_rpaths: - for rpath in self.compiler.extra_rpaths: - env.prepend_path("LD_LIBRARY_PATH", rpath) class CMakeBuilder(spack.build_systems.cmake.CMakeBuilder): @run_after("build") @@ -444,7 +557,7 @@ def build_test_binaries(self): not be intended with ``--test``. """ if self.pkg.run_tests: - with fs.working_dir(self.build_directory): + with working_dir(self.build_directory): make("tests") def check(self): @@ -453,7 +566,7 @@ def check(self): Override the standard CMakeBuilder behavior. GROMACS has both `test` and `check` targets, but we are only interested in the latter. """ - with fs.working_dir(self.build_directory): + with working_dir(self.build_directory): if self.generator == "Unix Makefiles": make("check") elif self.generator == "Ninja": @@ -470,23 +583,7 @@ def cmake_args(self): # In other words, the mapping between package variants and the # GMX CMake variables is often non-trivial. - gmx_cc = spack_cc - gmx_cxx = spack_cxx - if "+rocm" in self.spec: - # The ROCm version requires the ROCm LLVM installation - gmx_cc = os.path.join(self.spec["llvm"].prefix.bin, "clang") - gmx_cxx = os.path.join(self.spec["llvm"].prefix.bin, "clang++") - if not fs.is_exe(gmx_cc) or not fs.is_exe(gmx_cxx): - gmx_cc = path.join(self.spec["llvm"].prefix.bin, "amdclang") - gmx_cxx = path.join(self.spec["llvm"].prefix.bin, "amdclang++") - if not fs.is_exe(gmx_cc) or not fs.is_exe(gmx_cxx): - raise InstallError( - "concretized LLVM dependency must provide a " - "valid clang/amdclang executable, found invalid: " - "{0}/{1}".format(gmx_cc, gmx_cxx) - ) - - if "+mpi" in self.spec: + if self.spec.satisfies("+mpi"): options.append("-DGMX_MPI:BOOL=ON") if self.pkg.version < Version("2020"): # Ensures gmxapi builds properly @@ -509,41 +606,23 @@ def cmake_args(self): else: options.extend( [ - "-DCMAKE_C_COMPILER=%s" % gmx_cc, - "-DCMAKE_CXX_COMPILER=%s" % gmx_cxx, + "-DCMAKE_C_COMPILER=%s" % spack_cc, + "-DCMAKE_CXX_COMPILER=%s" % spack_cxx, "-DMPI_C_COMPILER=%s" % self.spec["mpi"].mpicc, "-DMPI_CXX_COMPILER=%s" % self.spec["mpi"].mpicxx, ] ) - if 'on' in self.spec.variants['gpu-aware-mpi'].value: - options.extend( - [ - "-DGMX_ENABLE_DIRECT_GPU_COMM=ON", - "-DGMX_FORCE_GPU_AWARE_MPI=OFF", - ] - ) - elif 'force' in self.spec.variants['gpu-aware-mpi'].value: - options.extend( - [ - "-DGMX_ENABLE_DIRECT_GPU_COMM=ON", - "-DGMX_FORCE_GPU_AWARE_MPI=ON", - ] - ) options.extend([f"-DMPI_CXX_LINK_FLAGS='{self.spec['mpi'].libs.ld_flags}'"]) else: options.extend( [ - "-DCMAKE_C_COMPILER=%s" % gmx_cc, - "-DCMAKE_CXX_COMPILER=%s" % gmx_cxx, + "-DCMAKE_C_COMPILER=%s" % spack_cc, + "-DCMAKE_CXX_COMPILER=%s" % spack_cxx, "-DGMX_MPI:BOOL=OFF", "-DGMX_THREAD_MPI:BOOL=ON", ] ) - # Here we cannot use spack_cc because we need also libstdc++ to be reachable - # Spack wrapper (spack_cc) hides includes/lib and CMake will fail - options.append("-DGMX_GPLUSPLUS_PATH=%s" % self.pkg.compiler.cxx) - if self.spec.satisfies("%aocc"): options.append("-DCMAKE_CXX_FLAGS=--stdlib=libc++") @@ -558,49 +637,59 @@ def cmake_args(self): ): with open(".".join([os.environ["SPACK_CXX"], "cfg"]), "r") as f: options.append("-DCMAKE_CXX_FLAGS={}".format(f.read())) - else: + elif self.spec.satisfies("^gcc"): options.append("-DGMX_GPLUSPLUS_PATH=%s/g++" % self.spec["gcc"].prefix.bin) - if "+double" in self.spec: + if self.spec.satisfies("+double"): options.append("-DGMX_DOUBLE:BOOL=ON") - if "+nosuffix" in self.spec: + if self.spec.satisfies("+nosuffix"): options.append("-DGMX_DEFAULT_SUFFIX:BOOL=OFF") - if "~shared" in self.spec: + if self.spec.satisfies("~shared"): options.append("-DBUILD_SHARED_LIBS:BOOL=OFF") options.append("-DGMXAPI:BOOL=OFF") - if "+hwloc" in self.spec: + if self.spec.satisfies("+hwloc"): options.append("-DGMX_HWLOC:BOOL=ON") else: options.append("-DGMX_HWLOC:BOOL=OFF") if self.pkg.version >= Version("2021"): - if "+cuda" in self.spec: + if self.spec.satisfies("+cuda"): options.append("-DGMX_GPU:STRING=CUDA") - elif "+opencl" in self.spec: + elif self.spec.satisfies("+opencl"): options.append("-DGMX_GPU:STRING=OpenCL") - elif "+sycl" in self.spec or "+rocm" in self.spec: + elif self.spec.satisfies("+sycl"): options.append("-DGMX_GPU:STRING=SYCL") - if "+rocm" in self.spec: + if self.spec.satisfies("+rocm"): options.append("-DGMX_SYCL_HIPSYCL:BOOL=ON") hipsycl_dir = os.path.join(self.spec["sycl"].prefix.lib, "cmake/hipSYCL/") options.append(f"-Dhipsycl_DIR:STRING={hipsycl_dir}") rocm_archs = ",".join(self.spec.variants["amdgpu_target"].value) options.append(f"-DHIPSYCL_TARGETS:STRING=hip:{rocm_archs}") + options.append("-DGMX_SYCL=ACPP") + options.append(f"-DACPP_TARGETS=hip:{rocm_archs}") + elif self.spec.satisfies("+rocm") and self.spec.satisfies("~sycl"): + rocm_archs = ",".join(self.spec.variants["amdgpu_target"].value) + options.append(f"-DCMAKE_HIP_COMPILER={self.spec['hip'].prefix.bin}/amdclang++") + options.append("-DGMX_GPU=HIP") + options.append(f"-DGMX_HIP_TARGET_ARCH={rocm_archs}") else: options.append("-DGMX_GPU:STRING=OFF") else: - if "+cuda" in self.spec or "+opencl" in self.spec: + if self.spec.satisfies("+cuda") or self.spec.satisfies("+opencl"): options.append("-DGMX_GPU:BOOL=ON") - if "+opencl" in self.spec: + if self.spec.satisfies("+opencl"): options.append("-DGMX_USE_OPENCL=ON") else: options.append("-DGMX_GPU:BOOL=OFF") - if "+cuda" in self.spec: + if self.spec.satisfies("+cuda"): options.append("-DCUDA_TOOLKIT_ROOT_DIR:STRING=" + self.spec["cuda"].prefix) + if not self.spec.satisfies("cuda_arch=none"): + cuda_arch = self.spec.variants["cuda_arch"].value + options.append(f"-DGMX_CUDA_TARGET_SM:STRING={';'.join(cuda_arch)}") target = self.spec.target if target.family == "ppc64le": @@ -617,28 +706,35 @@ def cmake_args(self): if self.spec["blas"].libs: options.append("-DGMX_BLAS_USER={0}".format(self.spec["blas"].libs.joined(";"))) - if "+cp2k" in self.spec: + if self.spec.satisfies("+cp2k"): options.append("-DGMX_CP2K:BOOL=ON") options.append("-DCP2K_DIR:STRING={0}".format(self.spec["cp2k"].prefix)) - if "+cufftmp" in self.spec: + if self.spec.satisfies("+cufftmp"): options.append("-DGMX_USE_CUFFTMP=ON") options.append( f'-DcuFFTMp_ROOT={self.spec["nvhpc"].prefix}/Linux_{self.spec.target.family}' + f'/{self.spec["nvhpc"].version}/math_libs' ) - if "+heffte" in self.spec: + if self.spec.satisfies("+heffte"): options.append("-DGMX_USE_HEFFTE=on") options.append(f'-DHeffte_ROOT={self.spec["heffte"].prefix}') - if "+intel-data-center-gpu-max" in self.spec: + if self.spec.satisfies("+intel-data-center-gpu-max"): options.append("-DGMX_GPU_NB_CLUSTER_SIZE=8") options.append("-DGMX_GPU_NB_NUM_CLUSTER_PER_CELL_X=1") - if "~nblib" in self.spec: + if "+itt" in self.spec: + options.append("-DGMX_USE_ITT=on") + options.append( + "-DITTNOTIFY_INCLUDE_DIR=%s" + % self.spec["intel-oneapi-vtune"].package.headers.directories[0] + ) + + if self.spec.satisfies("~nblib"): options.append("-DGMX_INSTALL_NBLIB_API=OFF") - if "~gmxapi" in self.spec: + if self.spec.satisfies("~gmxapi"): options.append("-DGMXAPI=OFF") # Activate SIMD based on properties of the target @@ -716,7 +812,7 @@ def cmake_args(self): ) ) - if "+cycle_subcounters" in self.spec: + if self.spec.satisfies("+cycle_subcounters"): options.append("-DGMX_CYCLE_SUBCOUNTERS:BOOL=ON") else: options.append("-DGMX_CYCLE_SUBCOUNTERS:BOOL=OFF") @@ -725,10 +821,19 @@ def cmake_args(self): options.append( "-DGMX_OPENMP_MAX_THREADS=%s" % self.spec.variants["openmp_max_threads"].value ) + if self.spec.satisfies("+nvshmem"): + options.append("-DGMX_NVSHMEM:BOOL=ON") + nvshmem_root = join_path( + self.spec["nvhpc"].prefix, + f"Linux_{self.spec.target.family}", + self.spec["nvhpc"].version, + "comm_libs", + "nvshmem", + ) + options.append(f"-DNVSHMEM_ROOT={nvshmem_root}") - if self.spec["lapack"].name in INTEL_MATH_LIBRARIES: - # fftw-api@3 is provided by intel-mkl or intel-parllel-studio - # we use the mkl interface of gromacs + if self.spec.satisfies("^[virtuals=lapack] intel-oneapi-mkl"): + # fftw-api@3 is provided by intel-oneapi-mkl options.append("-DGMX_FFT_LIBRARY=mkl") if self.spec.satisfies("@:2022"): options.append( @@ -740,7 +845,7 @@ def cmake_args(self): else: # we rely on the fftw-api@3 options.append("-DGMX_FFT_LIBRARY=fftw3") - if "^amdfftw" in self.spec: + if self.spec.satisfies("^[virtuals=fftw-api] amdfftw"): options.append("-DGMX_FFT_LIBRARY=fftw3") options.append( "-DFFTWF_INCLUDE_DIRS={0}".format(self.spec["amdfftw"].headers.directories[0]) @@ -748,26 +853,47 @@ def cmake_args(self): options.append( "-DFFTWF_LIBRARIES={0}".format(self.spec["amdfftw"].libs.joined(";")) ) - elif "^armpl-gcc" in self.spec: + elif self.spec.satisfies("^armpl-gcc"): options.append( "-DFFTWF_INCLUDE_DIR={0}".format(self.spec["armpl-gcc"].headers.directories[0]) ) options.append( "-DFFTWF_LIBRARY={0}".format(self.spec["armpl-gcc"].libs.joined(";")) ) - elif "^acfl" in self.spec: + elif self.spec.satisfies("^acfl"): options.append( "-DFFTWF_INCLUDE_DIR={0}".format(self.spec["acfl"].headers.directories[0]) ) options.append("-DFFTWF_LIBRARY={0}".format(self.spec["acfl"].libs.joined(";"))) - elif self.pkg.version >= Version("2023") and "+rocm" in self.spec: + elif self.pkg.version >= Version("2023") and self.spec.satisfies("+rocm"): # Use ROCm FFT library options.append("-DGMX_GPU_FFT_LIBRARY=rocFFT") # Ensure that the GROMACS log files report how the code was patched # during the build, so that any problems are easier to diagnose. - if "+plumed" in self.spec: + if self.spec.satisfies("+plumed"): options.append("-DGMX_VERSION_STRING_OF_FORK=PLUMED-spack") else: options.append("-DGMX_VERSION_STRING_OF_FORK=spack") return options + + def setup_build_environment(self, env): + if self.pkg.compiler.extra_rpaths: + for rpath in self.pkg.compiler.extra_rpaths: + env.prepend_path("LD_LIBRARY_PATH", rpath) + if self.spec.satisfies("+cufftmp"): + env.append_path( + "LD_LIBRARY_PATH", + join_path( + self.spec["nvhpc"].prefix, + f"Linux_{self.spec.target.family}", + self.spec["nvhpc"].version, + "comm_libs", + "nvshmem", + "lib", + ), + ) + if "force" in self.spec.variants["direct-gpu-comm"].value: + env.set("GMX_FORCE_GPU_AWARE_MPI", "1") + elif "off" in self.spec.variants["direct-gpu-comm"].value: + env.set("GMX_DISABLE_DIRECT_GPU_COMM", "1") diff --git a/repo/hipsycl/package.py b/repo/hipsycl/package.py index 430cbe465..56a1d6c6a 100644 --- a/repo/hipsycl/package.py +++ b/repo/hipsycl/package.py @@ -4,41 +4,40 @@ # SPDX-License-Identifier: Apache-2.0 import json -from os import path - -from llnl.util import filesystem +import os from spack.package import * -class Hipsycl(CMakePackage, CudaPackage, ROCmPackage): +class Hipsycl(CMakePackage, ROCmPackage): """hipSYCL is an implementation of the SYCL standard programming model over NVIDIA CUDA/AMD HIP""" - homepage = "https://github.com/AdaptiveCpp/AdaptiveCpp" - url = "https://github.com/AdaptiveCpp/AdaptiveCpp/archive/v23.10.0.tar.gz" - git = "https://github.com/AdaptiveCpp/AdaptiveCpp.git" - - maintainers("nazavode") + homepage = "https://github.com/illuhad/hipSYCL" + url = "https://github.com/illuhad/hipSYCL/archive/v0.8.0.tar.gz" + git = "https://github.com/illuhad/hipSYCL.git" provides("sycl") license("BSD-2-Clause") version("stable", branch="stable", submodules=True) - # New name ACPP - version("23.10.0", sha256="9ac3567c048a848f4e6eadb15b09750357ae399896e802b2f1dcaecf8a090064") - version("0.9.4", sha256="d9269c814f5e07b54a58bcef177950f222e22127c8399edc2e627d6b9e250763") - version("0.9.3", sha256="6a2e2d81bd21209ad0726d5aa377321e177fecb775ad93078259835be0931f51") - version("0.9.2", sha256="4b2308eb19b978a8528d55fe8c9fbb18d5be51aa0dd1a18a068946d8ddedebb1") - version("0.9.1", sha256="f4adbe283d21272d5b5a1431650971c29360453c167b8c3f6ed32611322aa4b1") - version("0.9.0", sha256="a7c565055fcb88dbca73693d3497fc8118e4f65b435e9bf136098a06b19dd8fc") - version("0.8.0", sha256="18e1a14d2f5f86a9bb4b799818c086d017a7e397a5f58bb92463a90355951f44") + version("24.10.0", commit="7677cf6eefd8ab46d66168cd07ab042109448124", submodules=True) + version("24.06.0", commit="fc51dae9006d6858fc9c33148cc5f935bb56b075", submodules=True) + version("24.02.0", commit="974adc33ea5a35dd8b5be68c7a744b37482b8b64", submodules=True) + version("23.10.0", commit="3952b468c9da89edad9dff953cdcab0a3c3bf78c", submodules=True) + version("0.9.4", commit="99d9e24d462b35e815e0e59c1b611936c70464ae", submodules=True) + version("0.9.3", commit="51507bad524c33afe8b124804091b10fa25618dc", submodules=True) + version("0.9.2", commit="49fd02499841ae884c61c738610e58c27ab51fdb", submodules=True) + version("0.9.1", commit="fe8465cd5399a932f7221343c07c9942b0fe644c", submodules=True) + version("0.8.0", commit="2daf8407e49dd32ebd1c266e8e944e390d28b22a", submodules=True) version("develop", branch="develop", submodules=True) - variant("opencl", default=False, description="Enable OpenCL backend for SYCL kernels") - variant("sscp", default=False, description="Enable SSCP compiler") - variant("intel", default=False, description="Enable Intel Level Zero backend for SYCL kernels") + variant("cuda", default=False, description="Enable CUDA backend for SYCL kernels") + variant("rocm", default=False, description="Enable ROCM backend for SYCL kernels") + + depends_on("c", type="build") + depends_on("cxx", type="build") depends_on("cmake@3.5:", type="build") depends_on("boost +filesystem", when="@:0.8") @@ -46,17 +45,37 @@ class Hipsycl(CMakePackage, CudaPackage, ROCmPackage): depends_on("python@3:") depends_on("llvm@8: +clang", when="~cuda") depends_on("llvm@9: +clang", when="+cuda") + # hipSYCL 0.8.0 supported only LLVM 8-10: # (https://github.com/AdaptiveCpp/AdaptiveCpp/blob/v0.8.0/CMakeLists.txt#L29-L37) + # recent versions support only up to llvm18 + # https://github.com/spack/spack/issues/46681 + # https://github.com/spack/spack/issues/49506 + + # The following list was made based on the version tested in adaptivecpp github + depends_on("llvm@14:18", when="@develop") + depends_on("llvm@14:18", when="@stable") + + depends_on("llvm@14:18", when="@24.10.0") + depends_on("llvm@14:18", when="@24.06.0") + depends_on("llvm@13:17", when="@24.02.0") + depends_on("llvm@13:17", when="@23.10.0") + depends_on("llvm@11:15", when="@0.9.4") + depends_on("llvm@11:14", when="@0.9.3") + depends_on("llvm@11:13", when="@0.9.2") + depends_on("llvm@11", when="@0.9.1") + # depends_on("llvm@10:11", when="@0.9.0") # missing in releases depends_on("llvm@8:10", when="@0.8.0") + + # https://github.com/spack/spack/issues/45029 and https://github.com/spack/spack/issues/43142 + conflicts("^gcc@12", when="@23.10.0") # https://github.com/OpenSYCL/OpenSYCL/pull/918 was introduced after 0.9.4 - conflicts("^llvm@16:", when="@:0.9.4") + conflicts("^gcc@12.2.0", when="@:0.9.4") # LLVM PTX backend requires cuda7:10.1 (https://tinyurl.com/v82k5qq) depends_on("cuda@9:10.1", when="@0.8.1: +cuda ^llvm@9") depends_on("cuda@9:", when="@0.8.1: +cuda ^llvm@10:") # hipSYCL@:0.8.0 requires cuda@9:10.0 due to a known bug depends_on("cuda@9:10.0", when="@:0.8.0 +cuda") - depends_on("llvm@9: +clang", when="+rocm") conflicts( "%gcc@:4", @@ -80,32 +99,24 @@ def cmake_args(self): spec = self.spec args = [ "-DWITH_CPU_BACKEND:Bool=TRUE", - "-DWITH_ROCM_BACKEND:Bool={0}".format("TRUE" if "+rocm" in spec else "FALSE"), - "-DWITH_CUDA_BACKEND:Bool={0}".format("TRUE" if "+cuda" in spec else "FALSE"), - "-DWITH_LEVEL_ZERO_BACKEND:Bool={0}".format("TRUE" if "+intel" in spec else "FALSE"), - "-DWITH_OPENCL_BACKEND:Bool={0}".format("TRUE" if "+opencl" in spec else "FALSE"), - "-DWITH_SSCP_COMPILER:Bool={0}".format("TRUE" if "+sscp" in spec else "FALSE"), + "-DWITH_ROCM_BACKEND:Bool={0}".format("TRUE" if spec.satisfies("+rocm") else "FALSE"), + "-DWITH_CUDA_BACKEND:Bool={0}".format("TRUE" if spec.satisfies("+cuda") else "FALSE"), # prevent hipSYCL's cmake to look for other LLVM installations # if the specified one isn't compatible "-DDISABLE_LLVM_VERSION_CHECK:Bool=TRUE", ] - if self.version >= Version("23.10.0"): - args.append("-DACPP_VERSION_SUFFIX={0}".format(self.version)) # LLVM directory containing all installed CMake files # (e.g.: configs consumed by client projects) - llvm_cmake_dirs = filesystem.find(spec["llvm"].prefix.lib, "LLVMExports.cmake") + llvm_cmake_dirs = find(spec["llvm"].prefix, "LLVMExports.cmake") if len(llvm_cmake_dirs) != 1: raise InstallError( "concretized llvm dependency must provide " "a unique directory containing CMake client " "files, found: {0}".format(llvm_cmake_dirs) ) - args.append("-DLLVM_DIR:String={0}".format(path.dirname(llvm_cmake_dirs[0]))) - + args.append("-DLLVM_DIR:String={0}".format(os.path.dirname(llvm_cmake_dirs[0]))) # clang internal headers directory - llvm_clang_include_dirs = filesystem.find( - spec["llvm"].prefix.lib, "__clang_cuda_runtime_wrapper.h" - ) + llvm_clang_include_dirs = find(spec["llvm"].prefix, "__clang_cuda_runtime_wrapper.h") if len(llvm_clang_include_dirs) != 1: raise InstallError( "concretized llvm dependency must provide a " @@ -113,75 +124,89 @@ def cmake_args(self): "headers, found: {0}".format(llvm_clang_include_dirs) ) args.append( - "-DCLANG_INCLUDE_PATH:String={0}".format(path.dirname(llvm_clang_include_dirs[0])) + "-DCLANG_INCLUDE_PATH:String={0}".format(os.path.dirname(llvm_clang_include_dirs[0])) ) - # Find the right LLVM compiler - llvm_clang_bin = path.join(spec["llvm"].prefix.bin, "clang") - llvm_clang_bin_cpp = path.join(spec["llvm"].prefix.bin, "clang++") - if not filesystem.is_exe(llvm_clang_bin): - llvm_clang_bin = path.join(spec["llvm"].prefix.bin, "amdclang") - llvm_clang_bin_cpp = path.join(spec["llvm"].prefix.bin, "amdclang++") - if not filesystem.is_exe(llvm_clang_bin): - raise InstallError( - "concretized LLVM dependency must provide a " - "valid clang/amdclang executable, found invalid: " - "{0}".format(llvm_clang_bin) - ) + # target clang++ executable + llvm_clang_bin = os.path.join(spec["llvm"].prefix.bin, "clang++") + if not is_exe(llvm_clang_bin): + raise InstallError( + "concretized llvm dependency must provide a " + "valid clang++ executable, found invalid: " + "{0}".format(llvm_clang_bin) + ) args.append("-DCLANG_EXECUTABLE_PATH:String={0}".format(llvm_clang_bin)) - args.append("-DCMAKE_C_COMPILER:String={0}".format(llvm_clang_bin)) - args.append("-DCMAKE_CXX_COMPILER:String={0}".format(llvm_clang_bin_cpp)) - # explicit CUDA toolkit - if "+cuda" in spec: + if spec.satisfies("+cuda"): args.append("-DCUDA_TOOLKIT_ROOT_DIR:String={0}".format(spec["cuda"].prefix)) - if "+rocm" in spec: - # FIXME: here spec["rocm"].prefix does not work - # Instead (temporary solution: we use HIP prefix and - # remove the "hip/" part of the path which is the ROCm path - rocm_path = path.split(spec["hip"].prefix[:-1])[0] - args.append("-DROCM_PATH:String={0}".format(rocm_path)) - + if spec.satisfies("+rocm"): + args.append("-DWITH_ACCELERATED_CPU:STRING=OFF") + args.append("-DROCM_PATH:STRING={0}".format(os.environ.get("ROCM_PATH"))) + if self.spec.satisfies("@24.02.0:"): + args.append("-DWITH_SSCP_COMPILER=OFF") return args @run_after("install") def filter_config_file(self): - config_file_paths = filesystem.find(self.prefix, "syclcc.json") - if len(config_file_paths) != 1: - raise InstallError( - "installed hipSYCL must provide a unique compiler driver " - "configuration file, found: {0}".format(config_file_paths) - ) - config_file_path = config_file_paths[0] - with open(config_file_path) as f: - config = json.load(f) - # 1. Fix compiler: use the real one in place of the Spack wrapper - config["default-cpu-cxx"] = self.compiler.cxx - # 2. Fix stdlib: we need to make sure cuda-enabled binaries find - # the libc++.so and libc++abi.so dyn linked to the sycl - # ptx backend - if "+cuda" in self.spec: - rpaths = set() - so_paths = filesystem.find_libraries( - "libc++", self.spec["llvm"].prefix, shared=True, recursive=True - ) - if len(so_paths) != 1: + def edit_config(filename, editor): + config_file_paths = find(self.prefix, filename) + if len(config_file_paths) != 1: raise InstallError( - "concretized llvm dependency must provide a " - "unique directory containing libc++.so, " - "found: \"{0}\"".format(so_paths) + "installed hipSYCL must provide a unique compiler driver" + "configuration file ({0}), found: {1}".format(filename, config_file_paths) ) - rpaths.add(path.dirname(so_paths[0])) - so_paths = filesystem.find_libraries( - "libc++abi", self.spec["llvm"].prefix, shared=True, recursive=True - ) - if len(so_paths) != 1: - raise InstallError( - "concretized llvm dependency must provide a " - "unique directory containing libc++abi, " - "found: \"{0}\"".format(so_paths) + config_file_path = config_file_paths[0] + with open(config_file_path) as f: + config = json.load(f) + + config_modified = editor(config) + + with open(config_file_path, "w") as f: + json.dump(config_modified, f, indent=2) + + if self.spec.satisfies("@:23.10.0"): + configfiles = {"core": "syclcc.json", "cuda": "syclcc.json"} + else: + configfiles = {"core": "acpp-core.json", "cuda": "acpp-cuda.json"} + + def adjust_core_config(config): + config["default-cpu-cxx"] = self.compiler.cxx + return config + + edit_config(configfiles["core"], adjust_core_config) + + if self.spec.satisfies("+cuda"): + # 1. Fix compiler: use the real one in place of the Spack wrapper + + # 2. Fix stdlib: we need to make sure cuda-enabled binaries find + # the libc++.so and libc++abi.so dyn linked to the sycl + # ptx backend + rpaths = set() + if self.spec.satisfies("~rocm"): + so_paths = find_libraries( + "libc++", self.spec["llvm"].prefix, shared=True, recursive=True + ) + if len(so_paths) != 1: + raise InstallError( + "concretized llvm dependency must provide a " + "unique directory containing libc++.so, " + "found: {0}".format(so_paths) + ) + rpaths.add(os.path.dirname(so_paths[0])) + so_paths = find_libraries( + "libc++abi", self.spec["llvm"].prefix, shared=True, recursive=True ) - rpaths.add(path.dirname(so_paths[0])) - config["default-cuda-link-line"] += " " + " ".join("-rpath {0}".format(p) for p in rpaths) - # Replace the installed config file - with open(config_file_path, "w") as f: - json.dump(config, f, indent=2) + if len(so_paths) != 1: + raise InstallError( + "concretized llvm dependency must provide a " + "unique directory containing libc++abi, " + "found: {0}".format(so_paths) + ) + rpaths.add(os.path.dirname(so_paths[0])) + + def adjust_cuda_config(config): + config["default-cuda-link-line"] += " " + " ".join( + "-rpath {0}".format(p) for p in rpaths + ) + return config + + edit_config(configfiles["cuda"], adjust_cuda_config) diff --git a/systems/all_hardware_descriptions/DELL-sapphirerapids-H100-Infiniband/hardware_description.yaml b/systems/all_hardware_descriptions/DELL-sapphirerapids-H100-Infiniband/hardware_description.yaml new file mode 100644 index 000000000..9c32be2fd --- /dev/null +++ b/systems/all_hardware_descriptions/DELL-sapphirerapids-H100-Infiniband/hardware_description.yaml @@ -0,0 +1,33 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 +system_definition: + name: DELL-sapphirerapids-H100-Infiniband + integrator: + vendor: DELLEMC + name: PowerEdge + processor: + vendor: Intel + name: XeonPlatinum8480 + ISA: x86_64 + uArch: sapphirerapids + accelerator: + vendor: NVIDIA + name: H100 + ISA: PTX + uArch: sm_90 + interconnect: + vendor: Mellanox + name: HDR100InfiniBand + systems-tested: + llnl-cluster: + os: TOSS + scheduler: slurm + compiler: gcc + runtime: + mpi: mvapich + top500-system-instances: + Dane: + benchpark_system: llnl-matrix + top500: diff --git a/systems/llnl-cluster/system.py b/systems/llnl-cluster/system.py index c130f17ee..96cd27078 100644 --- a/systems/llnl-cluster/system.py +++ b/systems/llnl-cluster/system.py @@ -115,8 +115,9 @@ def compute_packages_section(self): }, "cmake": { "externals": [ - {"spec": "cmake@3.26.5", "prefix": "/usr"}, - {"spec": "cmake@3.23.1", "prefix": "/usr/tce"}, + {"spec": "cmake@3.26.5", "prefix": "/usr/tce/packages/cmake/cmake-3.26.3"}, + {"spec": "cmake@3.23.1", "prefix": "/usr/tce/packages/cmake/cmake-3.23.1"}, + {"spec": "cmake@3.31.7", "prefix": "/collab/usr/global/tools/cmake/toss_4_x86_64_ib/cmake-3.31.7"}, ], "buildable": False, }, diff --git a/systems/llnl-elcapitan/system.py b/systems/llnl-elcapitan/system.py index c0e1a603a..0e211677b 100644 --- a/systems/llnl-elcapitan/system.py +++ b/systems/llnl-elcapitan/system.py @@ -159,9 +159,18 @@ def compute_packages_section(self): "groff": {"externals": [{"spec": "groff@1.22.3", "prefix": "/usr"}]}, "cmake": { "externals": [ - {"spec": "cmake@3.20.2", "prefix": "/usr"}, - {"spec": "cmake@3.23.1", "prefix": "/usr/tce"}, - {"spec": "cmake@3.24.2", "prefix": "/usr/tce"}, + { + "spec": "cmake@3.23.1", + "prefix": "/usr/tce/packages/cmake/cmake-3.23.1", + }, + { + "spec": "cmake@3.24.2", + "prefix": "/usr/tce/packages/cmake/cmake-3.24.2", + }, + { + "spec": "cmake@3.29.2", + "prefix": "/usr/tce/packages/cmake/cmake-3.29.2", + }, ], "buildable": False, }, @@ -404,7 +413,7 @@ def mpi_config(self): "cray-mpich": { "externals": [ { - "spec": f"cray-mpich@{self.mpi_version}%cce@{self.cce_version} {gtl_spec} +wrappers", + "spec": f"cray-mpich@{self.mpi_version}%rocmcc@{self.rocm_version} {gtl_spec} +wrappers", "prefix": f"/opt/cray/pe/mpich/{self.mpi_version}/ofi/crayclang/{self.short_cce_version}", "extra_attributes": gtl_cfg, } diff --git a/systems/llnl-matrix/system.py b/systems/llnl-matrix/system.py new file mode 100644 index 000000000..95f96c6c0 --- /dev/null +++ b/systems/llnl-matrix/system.py @@ -0,0 +1,400 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC and other +# Benchpark Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: Apache-2.0 + + +from benchpark.directives import variant, maintainers +from benchpark.cudasystem import CudaSystem +from benchpark.paths import hardware_descriptions +from benchpark.system import System +from packaging.version import Version + + +class LlnlMatrix(System): + + maintainers("pearce8", "michaelmckinsey1") + + id_to_resources = { + "matrix": { + "cuda_arch": 90, + "sys_cores_per_node": 112, + "sys_gpus_per_node": 4, + "system_site": "llnl", + "hardware_key": str(hardware_descriptions) + + "/DELL-sapphirerapids-H100-Infiniband/hardware_description.yaml", + }, + } + + variant( + "cuda", + default="12.2.2", + values=("12.2.2", "11.8.0"), + description="CUDA version", + ) + + variant( + "compiler", + default="oneapi", + values=("oneapi", "gcc", "intel"), + description="Which compiler to use", + ) + + def __init__(self, spec): + super().__init__(spec) + self.programming_models = [CudaSystem()] + self.cuda_version = Version(self.spec.variants["cuda"][0]) + self.gtl_flag = False + + self.scheduler = "slurm" + attrs = self.id_to_resources.get("matrix") + for k, v in attrs.items(): + setattr(self, k, v) + + def compute_packages_section(self): + selections = { + "packages": { + "elfutils": { + "externals": [{"spec": "elfutils@0.190", "prefix": "/usr"}], + "buildable": False, + }, + "papi": { + "buildable": False, + "externals": [ + { + "spec": "papi@6.0.0.1", + "prefix": "/usr/tce/packages/papi/papi-6.0.0.1", + } + ], + }, + "unwind": { + "externals": [{"spec": "unwind@8.0.1", "prefix": "/usr"}], + "buildable": False, + }, + "fftw": { + "buildable": False, + "externals": [ + { + "spec": "fftw@3.3.10", + "prefix": "/usr/tce/packages/fftw/fftw-3.3.10", + } + ], + }, + "intel-oneapi-mkl": { + "externals": [ + { + "spec": "intel-oneapi-mkl@2023.2.0", + "prefix": "/opt/intel/oneapi", + } + ], + "buildable": False, + }, + "blas": { + "buildable": False, + "externals": [ + { + "spec": "intel-oneapi-mkl@2023.2.0", + "prefix": "/opt/intel/oneapi", + } + ], + }, + "lapack": { + "buildable": False, + "externals": [ + { + "spec": "intel-oneapi-mkl@2023.2.0", + "prefix": "/opt/intel/oneapi", + } + ], + }, + "diffutils": { + "externals": [{"spec": "diffutils@3.6", "prefix": "/usr"}], + "buildable": False, + }, + "cmake": { + "externals": [ + {"spec": "cmake@3.26.5", "prefix": "/usr/tce/packages/cmake/cmake-3.26.3"}, + {"spec": "cmake@3.23.1", "prefix": "/usr/tce/packages/cmake/cmake-3.23.1"}, + {"spec": "cmake@3.31.7", "prefix": "/collab/usr/global/tools/cmake/toss_4_x86_64_ib/cmake-3.31.7"}, + ], + "buildable": True, + }, + "tar": { + "externals": [{"spec": "tar@1.30", "prefix": "/usr"}], + "buildable": False, + }, + "autoconf": { + "externals": [{"spec": "autoconf@2.69", "prefix": "/usr"}], + "buildable": False, + }, + "python": { + "externals": [ + { + "spec": "python@3.9.12+bz2+crypt+ctypes+dbm+lzma+pyexpat~pythoncmd+readline+sqlite3+ssl+tix+tkinter+uuid+zlib", + "prefix": "/usr/tce", + }, + ], + "buildable": False, + }, + "hwloc": { + "externals": [{"spec": "hwloc@2.11.2", "prefix": "/usr"}], + "buildable": False, + }, + "gmake": { + "externals": [{"spec": "gmake@4.2.1", "prefix": "/usr"}], + "buildable": False, + }, + "curl": { + "externals": [{"spec": "curl@7.61.1", "prefix": "/usr"}], + "buildable": False, + }, + } + } + + if self.spec.satisfies("compiler=gcc"): + selections |= { + "packages": selections["packages"] + | { + "mpi": { + "buildable": False, + "externals": [ + { + "spec": "mvapich2@2.3.7-gcc1211", + "prefix": "/usr/tce/packages/mvapich2/mvapich2-2.3.7-gcc-12.1.1", + "extra_attributes": { + "ldflags": "-L/usr/tce/packages/mvapich2/mvapich2-2.3.7-gcc-12.1.1/lib -lmpi" + }, + } + ], + } + } + } + elif self.spec.satisfies("compiler=intel"): + selections |= { + "packages": selections["packages"] + | { + "mpi": { + "buildable": False, + "externals": [ + { + "spec": "mvapich2@2.3.7-intel202160classic", + "prefix": "/usr/tce/packages/mvapich2/mvapich2-2.3.7-intel-classic-2021.6.0", + "extra_attributes": { + "ldflags": "-L/usr/tce/packages/mvapich2/mvapich2-2.3.7-intel-classic-2021.6.0/lib -lmpi" + }, + } + ], + } + } + } + elif self.spec.satisfies("compiler=oneapi"): + selections |= { + "packages": selections["packages"] + | { + "mpi": { + "buildable": False, + "externals": [ + { + "spec": "mvapich2@2.3.7-intel202321", + "prefix": "/usr/tce/packages/mvapich2/mvapich2-2.3.7-intel-2023.2.1", + "extra_attributes": { + "ldflags": "-L/usr/tce/packages/mvapich2/mvapich2-2.3.7-intel-2023.2.1/lib -lmpi" + }, + } + ], + } + } + } + + selections["packages"] |= self.compiler_weighting_cfg()["packages"] + + selections["packages"] |= self.cuda_config(self.spec.variants["cuda"][0])[ + "packages" + ] + + return selections + + def compiler_weighting_cfg(self): + compiler = self.spec.variants["compiler"][0] + + if compiler == "oneapi": + return {"packages": {"all": {"require": [{"one_of": ["%oneapi", "%gcc"]}]}}} + else: + return {"packages": {}} + + def compute_compilers_section(self): + selections = {} + if self.spec.satisfies("compiler=gcc"): + selections = { + "compilers": [ + { + "compiler": { + "spec": "gcc@12.1.1", + "paths": { + "cc": "/usr/tce/packages/gcc/gcc-12.1.1/bin/gcc", + "cxx": "/usr/tce/packages/gcc/gcc-12.1.1/bin/g++", + "f77": "/usr/tce/packages/gcc/gcc-12.1.1/bin/gfortran", + "fc": "/usr/tce/packages/gcc/gcc-12.1.1/bin/gfortran", + }, + "flags": {}, + "operating_system": "rhel8", + "target": "x86_64", + "modules": [], + "environment": {}, + "extra_rpaths": [], + } + } + ] + } + elif self.spec.satisfies("compiler=intel"): + selections = { + "compilers": [ + { + "compiler": { + "spec": "intel@2021.6.0-classic", + "paths": { + "cc": "/usr/tce/packages/intel-classic/intel-classic-2021.6.0/bin/icc", + "cxx": "/usr/tce/packages/intel-classic/intel-classic-2021.6.0/bin/icpc", + "f77": "/usr/tce/packages/intel-classic/intel-classic-2021.6.0/bin/ifort", + "fc": "/usr/tce/packages/intel-classic/intel-classic-2021.6.0/bin/ifort", + }, + "flags": {}, + "operating_system": "rhel8", + "target": "x86_64", + "modules": [], + "environment": {}, + "extra_rpaths": [], + } + } + ] + } + elif self.spec.satisfies("compiler=oneapi"): + selections = { + "compilers": [ + { + "compiler": { + "spec": "gcc@12.1.1", + "paths": { + "cc": "/usr/tce/packages/gcc/gcc-12.1.1/bin/gcc", + "cxx": "/usr/tce/packages/gcc/gcc-12.1.1/bin/g++", + "f77": "/usr/tce/packages/gcc/gcc-12.1.1/bin/gfortran", + "fc": "/usr/tce/packages/gcc/gcc-12.1.1/bin/gfortran", + }, + "flags": {}, + "operating_system": "rhel8", + "target": "x86_64", + "modules": [], + "environment": {}, + "extra_rpaths": [], + } + }, + { + "compiler": { + "spec": "oneapi@2023.2.1", + "paths": { + "cc": "/usr/tce/packages/intel/intel-2023.2.1/compiler/2023.2.1/linux/bin/icx", + "cxx": "/usr/tce/packages/intel/intel-2023.2.1/compiler/2023.2.1/linux/bin/icpx", + "f77": "/usr/tce/packages/intel/intel-2023.2.1/compiler/2023.2.1/linux/bin/ifx", + "fc": "/usr/tce/packages/intel/intel-2023.2.1/compiler/2023.2.1/linux/bin/ifx", + }, + "flags": {}, + "operating_system": "rhel8", + "target": "x86_64", + "modules": [ + f"cuda/{self.cuda_version}", + ], + "environment": {}, + "extra_rpaths": [], + } + }, + ] + } + + return selections + + def cuda_config(self, cuda_version): + return { + "packages": { + "blas": {"require": "intel-oneapi-mkl"}, + "lapack": {"require": "intel-oneapi-mkl"}, + "curand": { + "externals": [ + { + "spec": f"curand@{cuda_version}", + "prefix": f"/usr/tce/packages/cuda/cuda-{cuda_version}", + } + ], + "buildable": False, + }, + "cuda": { + "externals": [ + { + "spec": f"cuda@{cuda_version}+allow-unsupported-compilers", + "prefix": f"/usr/tce/packages/cuda/cuda-{cuda_version}", + } + ], + "buildable": False, + }, + "cub": { + "externals": [ + { + "spec": f"cub@{cuda_version}", + "prefix": f"/usr/tce/packages/cuda/cuda-{cuda_version}", + } + ], + "buildable": False, + }, + "cusparse": { + "externals": [ + { + "spec": f"cusparse@{cuda_version}", + "prefix": f"/usr/tce/packages/cuda/cuda-{cuda_version}", + } + ], + "buildable": False, + }, + "cublas": { + "externals": [ + { + "spec": f"cublas@{cuda_version}", + "prefix": f"/usr/tce/packages/cuda/cuda-{cuda_version}", + } + ], + "buildable": False, + }, + "cusolver": { + "externals": [ + { + "spec": f"cusolver@{cuda_version}", + "prefix": f"/usr/tce/packages/cuda/cuda-{cuda_version}", + } + ], + "buildable": False, + }, + "cufft": { + "externals": [ + { + "spec": f"cufft@{cuda_version}", + "prefix": f"/usr/tce/packages/cuda/cuda-{cuda_version}", + } + ], + "buildable": False, + }, + } + } + + def compute_software_section(self): + return { + "software": { + "packages": { + "default-compiler": {"pkg_spec": self.spec.variants["compiler"][0]}, + "default-mpi": {"pkg_spec": "mvapich2"}, + "compiler-gcc": {"pkg_spec": "gcc"}, + "compiler-intel": {"pkg_spec": "intel"}, + "blas": {"pkg_spec": "intel-oneapi-mkl"}, + "lapack": {"pkg_spec": "intel-oneapi-mkl"}, + "mpi-gcc": {"pkg_spec": "mvapich2"}, + "mpi-intel": {"pkg_spec": "mvapich2"}, + } + } + } diff --git a/systems/llnl-sierra/system.py b/systems/llnl-sierra/system.py index 050684acc..c863ff3c5 100644 --- a/systems/llnl-sierra/system.py +++ b/systems/llnl-sierra/system.py @@ -36,7 +36,7 @@ class LlnlSierra(System): variant( "cuda", default="11.8.0", - values=("11.8.0", "10.1.243"), + values=("12.2.2", "11.8.0", "10.1.243"), description="CUDA version", ) variant( @@ -48,7 +48,7 @@ class LlnlSierra(System): variant( "compiler", default="clang-ibm", - values=("clang-ibm", "xl", "xl-gcc", "clang"), + values=("clang-ibm", "xl", "xl-gcc", "clang", "gcc"), description="Which compiler to use", ) variant( @@ -255,6 +255,63 @@ def compute_packages_section(self): "buildable": False, }, } + elif self.spec.satisfies("cuda=12.2.2"): + selections["packages"] |= { + "curand": { + "externals": [ + { + "spec": "curand@12.2.2", + "prefix": "/usr/tce/packages/cuda/cuda-12.2.2", + } + ], + "buildable": False, + }, + "cusparse": { + "externals": [ + { + "spec": "cusparse@12.2.2", + "prefix": "/usr/tce/packages/cuda/cuda-12.2.2", + } + ], + "buildable": False, + }, + "cuda": { + "externals": [ + { + "spec": "cuda@12.2.2+allow-unsupported-compilers", + "prefix": "/usr/tce/packages/cuda/cuda-12.2.2", + } + ], + "buildable": False, + }, + "cub": { + "externals": [ + { + "spec": "cub@12.2.2", + "prefix": "/usr/tce/packages/cuda/cuda-12.2.2", + } + ], + "buildable": False, + }, + "cublas": { + "externals": [ + { + "spec": "cublas@12.2.2", + "prefix": "/usr/tce/packages/cuda/cuda-12.2.2", + } + ], + "buildable": False, + }, + "cusolver": { + "externals": [ + { + "spec": "cusolver@12.2.2", + "prefix": "/usr/tce/packages/cuda/cuda-12.2.2", + } + ], + "buildable": False, + }, + } if self.spec.satisfies("lapack=cusolver"): if self.spec.satisfies("cuda=10.1.243"): @@ -281,6 +338,18 @@ def compute_packages_section(self): "buildable": False, } } + elif self.spec.satisfies("cuda=12.2.2"): + selections["packages"] |= { + "cusolver": { + "externals": [ + { + "spec": "cusolver@12.2.2", + "prefix": "/usr/tce/packages/cuda/cuda-12.2.2", + } + ], + "buildable": False, + } + } elif self.spec.satisfies("lapack=essl"): selections["packages"] |= { "essl": { @@ -319,6 +388,18 @@ def compute_packages_section(self): "buildable": False, } } + elif self.spec.satisfies("cuda=12.2.2"): + selections["packages"] |= { + "cublas": { + "externals": [ + { + "spec": "cublas@12.2.2", + "prefix": "/usr/tce/packages/cuda/cuda-12.2.2", + } + ], + "buildable": False, + } + } elif self.spec.satisfies("blas=essl"): selections["packages"] |= { "essl": { @@ -394,6 +475,18 @@ def compute_packages_section(self): }, } ], + ( + "gcc", + "12-2-2", + ): [ + { + "spec": "spectrum-mpi@2023.06.28-gcc-11.2.2-cuda-12.2.2", + "prefix": "/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-gcc-11.2.1", + "extra_attributes": { + "ldflags": "-lmpiprofilesupport -lmpi_ibm_usempi -lmpi_ibm_mpifh -lmpi_ibm", + }, + } + ], } compiler = self.spec.variants["compiler"][0] @@ -551,6 +644,36 @@ def compute_compilers_section(self): } } ], + ( + "gcc", + "12-2-2", + ): [ + { + "compiler": { + "spec": "gcc@11.2.1-cuda12.2.2", + "paths": { + "cc": "/usr/tce/packages/gcc/gcc-11.2.1/bin/gcc", + "cxx": "/usr/tce/packages/gcc/gcc-11.2.1/bin/g++", + "f77": "/usr/tce/packages/gcc/gcc-11.2.1/bin/gfortran", + "fc": "/usr/tce/packages/gcc/gcc-11.2.1/bin/gfortran", + }, + "flags": { + "cflags": "-g -O2", + "cxxflags": "-g -O2 -std=c++17", + "fflags": "", + }, + "operating_system": "rhel7", + "target": "ppc64le", + "modules": ["cuda/12.2.2", "gcc/11.2.1"], + "environment": { + "set": { + "CUDA_DIR": "/usr/tce/packages/cuda/cuda-12.2.2", + }, + }, + "extra_rpaths": [], + } + } + ], } compiler = self.spec.variants["compiler"][0]