diff --git a/.github/workflows/prs.yml b/.github/workflows/prs.yml new file mode 100644 index 0000000..6250365 --- /dev/null +++ b/.github/workflows/prs.yml @@ -0,0 +1,52 @@ +name: Generate and serve API data for EESSI +on: + pull_request: + branches: + - main +concurrency: + group: pr-${{ github.event.pull_request.number }} + cancel-in-progress: true +permissions: + contents: read +jobs: + test_data_generation: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - uses: eessi/github-action-eessi@v3 + with: + use_eessi_module: true + eessi_stack_version: "2025.06" + - name: Create a virtualenv to install zensical + run: | + python -m venv /tmp/venv_docs + source /tmp/venv_docs/bin/activate + pip install zensical + - name: Generate API data + run: | + echo "Generating data files..." + module purge + module unuse $MODULEPATH + module use /cvmfs/software.eessi.io/init/modules/ + # Only do 2023.06 for EB 5 since this is just a test + ( module load EESSI/2023.06 && module load EasyBuild/5 && module load EESSI-extend && python scripts/generate_data_files.py --eessi-version=2023.06 ) & + # Merge all these results together + wait + python scripts/merge_data_files.py out.yaml eessi*.yaml + mv out.yaml docs/data/eessi_software_metadata.yaml + # Generate json data files and markdown index/description for them + cd docs/data + python ../../scripts/process_eessi_software_metadata.py eessi_software_metadata.yaml eessi_api_metadata + python ../../scripts/calculate_hashes.py + for json_file in *.json; do + python ../../scripts/generate_schema_md.py $json_file >> index.md + done + - name: Test building the website + run: | + source /tmp/venv_docs/bin/activate + zensical build --clean + - name: Upload EESSI API metadata + uses: actions/upload-artifact@v4 + with: + name: eessi-api-metadata + path: docs/data/eessi_api_metadata_software.json diff --git a/scripts/generate_data_files.py b/scripts/generate_data_files.py index 950eb4d..9107f31 100644 --- a/scripts/generate_data_files.py +++ b/scripts/generate_data_files.py @@ -47,7 +47,14 @@ def suppress_stdout(): sys.stdout = old_stdout -def load_and_list_modules(module_name): +def module_dict_from_module_string(module): + module_name, module_version = module.split("/", 1) + module_dict = {"module_name": module_name, "module_version": module_version, "full_module_name": module} + + return module_dict + + +def load_and_list_modules(full_module_name): """ Run `module load ` and `module list` inside a subshell. Returns the list of loaded modules visible inside that subshell. @@ -56,33 +63,33 @@ def load_and_list_modules(module_name): # Run as one shell script so the same session is used cmd = f""" - module load {module_name} || exit 1 + module load {full_module_name} || exit 1 module --terse list 2>&1 """ result = subprocess.run(["bash", "-c", cmd], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) if result.returncode != 0: - raise RuntimeError(f"Failed to load module '{module_name}':\n{result.stdout}") + raise RuntimeError(f"Failed to load module '{full_module_name}':\n{result.stdout}") # Parse module list output - modules = [line for line in result.stdout.splitlines() if "/" in line] + modules = [module_dict_from_module_string(line) for line in result.stdout.splitlines() if "/" in line] # Filter out the modules we expect to be loaded - eessi_extend_module_stub = "EESSI-extend/" - eb_module_stub = "EasyBuild/" - if module_name.startswith(eessi_extend_module_stub): + eessi_extend_module_name = "EESSI-extend" + eb_module_name = "EasyBuild" + if full_module_name.startswith(f"{eessi_extend_module_name}/"): # Don't filter anything pass - elif module_name.startswith(eb_module_stub): + elif full_module_name.startswith(f"{eb_module_name}/"): # Filter EESSI-extend - modules = [module for module in modules if not module.startswith(eessi_extend_module_stub)] + modules = [module for module in modules if module["module_name"] != eessi_extend_module_name] else: # Filter EESSI-extend and EasyBuild modules = [ module for module in modules - if not module.startswith(eessi_extend_module_stub) and not module.startswith(eb_module_stub) + if module["module_name"] != eessi_extend_module_name and module["module_name"] != eb_module_name ] return modules @@ -209,7 +216,9 @@ def collect_eb_files(base_path): # Store the toolchain hierarchies supported by the EESSI version eessi_software["eessi_version"][eessi_version]["toolchain_hierarchy"] = {} for top_level_toolchain in EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS[eessi_version]: - toolchain_family = f"{top_level_toolchain['name']}_{top_level_toolchain['version']}" + # versions are typically 2024a/2024b etc. for top level toolchains + # so let's use that to make sorting easy + toolchain_family = f"{top_level_toolchain['version']}_{top_level_toolchain['name']}" # Get the hierarchy and always add the system toolchain eessi_software["eessi_version"][eessi_version]["toolchain_hierarchy"][toolchain_family] = [ {"name": "system", "version": "system"} @@ -241,6 +250,7 @@ def collect_eb_files(base_path): ] shutil.rmtree(easyblocks_dir) + # Store everything we now know about the installation as a dict # Use the path as the key since we know it is unique eessi_software["eessi_version"][eessi_version][file] = parsed_ec["ec"].asdict() eessi_software["eessi_version"][eessi_version][file]["mtime"] = os.path.getmtime(file) @@ -255,11 +265,8 @@ def collect_eb_files(base_path): eessi_software["eessi_version"][eessi_version].pop(file) continue - # Store everything we now know about the installation as a dict # Add important data that is related to the module environment - eessi_software["eessi_version"][eessi_version][file]["full_mod_name"] = parsed_ec["full_mod_name"] - eessi_software["eessi_version"][eessi_version][file]["short_mod_name"] = parsed_ec["short_mod_name"] - eessi_software["eessi_version"][eessi_version][file]["required_modules"] = load_and_list_modules( + eessi_software["eessi_version"][eessi_version][file]["module"] = module_dict_from_module_string( parsed_ec["full_mod_name"] ) # Retain the easyblocks used so we can use a heuristic to figure out the type of extensions (R, Python, Perl) diff --git a/scripts/process_eessi_software_metadata.py b/scripts/process_eessi_software_metadata.py index a4fd049..9778419 100644 --- a/scripts/process_eessi_software_metadata.py +++ b/scripts/process_eessi_software_metadata.py @@ -25,12 +25,12 @@ ] TOOLCHAIN_FAMILIES = [ - "foss_2025b", - "foss_2025a", - "foss_2024a", - "foss_2023b", - "foss_2023a", - "foss_2022b", + "2025b_foss", + "2025a_foss", + "2024a_foss", + "2023b_foss", + "2023a_foss", + "2022b_foss", ] @@ -47,7 +47,7 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool "toolchain_families_compatibility": [ key for key in toolchain_families.keys() if file_metadata["toolchain"] in toolchain_families[key] ], - "modulename": file_metadata["short_mod_name"], + "module": file_metadata["module"], "required_modules": file_metadata["required_modules"], } @@ -65,7 +65,7 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool # 2) Construct the modulefile path before_arch, _, _ = original_path.partition(detected_arch) - modulefile = before_arch + detected_arch + "/modules/all/" + file_metadata["short_mod_name"] + '.lua' + modulefile = before_arch + detected_arch + "/modules/all/" + file_metadata["module"]["full_module_name"] + ".lua" spider_cache = before_arch + detected_arch + "/.lmod/cache/spiderT.lua" # 3) Substitute each architecture and test module file existence in spider cache @@ -93,60 +93,77 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool version_dict["versionsuffix"] = file_metadata["versionsuffix"] # No need for as we separate out the different types # version_dict['type'] = "application" - software[file_metadata["name"]]["versions"].append(version_dict) - # - Now extensions + # - Now extensions, we keep them both separately for each type and + # as dicts with extension types in the specific installation + version_dict["extensions"] = [] python_extensions = {} perl_extensions = {} r_extensions = {} octave_extensions = {} ruby_extensions = {} for ext in file_metadata["exts_list"]: - version_dict = copy.deepcopy(base_version_dict) + ext_version_dict = copy.deepcopy(base_version_dict) # (extensions are tuples beginning with name and version) - version_dict["version"] = ext[1] - version_dict["versionsuffix"] = "" + ext_version_dict["version"] = ext[1] + ext_version_dict["versionsuffix"] = "" # Add the parent software name so we can make a set for all versions - version_dict["parent_software"] = { + ext_version_dict["parent_software"] = { "name": file_metadata["name"], "version": file_metadata["version"], "versionsuffix": file_metadata["versionsuffix"], } # First we do a heuristic to figure out the type of extension if "pythonpackage.py" in file_metadata["easyblocks"]: - version_dict["description"] = ( - f"""{ext[0]} is a Python package included in the software module for {version_dict['parent_software']['name']}""" + # First add it to our list of extensions for the parent software + version_dict["extensions"].append({"type": "python", "name": ext[0], "version": ext[1]}) + + # Now create the custom entry + ext_version_dict["description"] = ( + f"""{ext[0]} is a Python package included in the software module for {ext_version_dict['parent_software']['name']}""" ) python_extensions[ext[0]] = {"versions": [], "parent_software": set()} - python_extensions[ext[0]]["versions"].append(version_dict) - python_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"]) + python_extensions[ext[0]]["versions"].append(ext_version_dict) + python_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"]) elif "rpackage.py" in file_metadata["easyblocks"]: - version_dict["description"] = ( - f"""{ext[0]} is an R package included in the software module for {version_dict['parent_software']['name']}""" + # First add it to our list of extensions for the parent software + version_dict["extensions"].append({"type": "r", "name": ext[0], "version": ext[1]}) + + ext_version_dict["description"] = ( + f"""{ext[0]} is an R package included in the software module for {ext_version_dict['parent_software']['name']}""" ) r_extensions[ext[0]] = {"versions": [], "parent_software": set()} - r_extensions[ext[0]]["versions"].append(version_dict) - r_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"]) + r_extensions[ext[0]]["versions"].append(ext_version_dict) + r_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"]) elif "perlmodule.py" in file_metadata["easyblocks"]: - version_dict["description"] = ( - f"""{ext[0]} is a Perl module package included in the software module for {version_dict['parent_software']['name']}""" + # First add it to our list of extensions for the parent software + version_dict["extensions"].append({"type": "perl", "name": ext[0], "version": ext[1]}) + + ext_version_dict["description"] = ( + f"""{ext[0]} is a Perl module package included in the software module for {ext_version_dict['parent_software']['name']}""" ) perl_extensions[ext[0]] = {"versions": [], "parent_software": set()} - perl_extensions[ext[0]]["versions"].append(version_dict) - perl_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"]) + perl_extensions[ext[0]]["versions"].append(ext_version_dict) + perl_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"]) elif "octavepackage.py" in file_metadata["easyblocks"]: - version_dict["description"] = ( - f"""{ext[0]} is an Octave package included in the software module for {version_dict['parent_software']['name']}""" + # First add it to our list of extensions for the parent software + version_dict["extensions"].append({"type": "octave", "name": ext[0], "version": ext[1]}) + + ext_version_dict["description"] = ( + f"""{ext[0]} is an Octave package included in the software module for {ext_version_dict['parent_software']['name']}""" ) octave_extensions[ext[0]] = {"versions": [], "parent_software": set()} - octave_extensions[ext[0]]["versions"].append(version_dict) - octave_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"]) + octave_extensions[ext[0]]["versions"].append(ext_version_dict) + octave_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"]) elif "rubygem.py" in file_metadata["easyblocks"]: - version_dict["description"] = ( - f"""{ext[0]} is an Ruby gem included in the software module for {version_dict['parent_software']['name']}""" + # First add it to our list of extensions for the parent software + version_dict["extensions"].append({"type": "ruby", "name": ext[0], "version": ext[1]}) + + ext_version_dict["description"] = ( + f"""{ext[0]} is an Ruby gem included in the software module for {ext_version_dict['parent_software']['name']}""" ) ruby_extensions[ext[0]] = {"versions": [], "parent_software": set()} - ruby_extensions[ext[0]]["versions"].append(version_dict) - ruby_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"]) + ruby_extensions[ext[0]]["versions"].append(ext_version_dict) + ruby_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"]) else: raise ValueError( f"Only known extension types are R, Python and Perl! Easyblocks used by {original_path} were {file_metadata['easyblocks']}" @@ -155,24 +172,30 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool components = {} if "components" in file_metadata.keys(): for component in file_metadata["components"]: + # First add it to our list of extensions for the parent software + version_dict["extensions"].append({"type": "component", "name": component[0], "version": component[1]}) + # extensions are tuples beginning with name and version if component[0] not in components.keys(): components[component[0]] = {"versions": [], "parent_software": set()} - version_dict = copy.deepcopy(base_version_dict) - version_dict["version"] = component[1] - version_dict["versionsuffix"] = "" - version_dict["type"] = "Component" - version_dict["parent_software"] = { + ext_version_dict = copy.deepcopy(base_version_dict) + ext_version_dict["version"] = component[1] + ext_version_dict["versionsuffix"] = "" + # version_dict["type"] = "Component" + ext_version_dict["parent_software"] = { "name": file_metadata["name"], "version": file_metadata["version"], - "version": file_metadata["versionsuffix"], + "versionsuffix": file_metadata["versionsuffix"], } - version_dict["description"] = ( - f"""{component[0]} is a component included in the software module for {version_dict['parent_software']['name']}""" + ext_version_dict["description"] = ( + f"""{component[0]} is a component included in the software module for {ext_version_dict['parent_software']['name']}""" ) - components[component[0]]["versions"].append(version_dict) - components[component[0]]["parent_software"].add(version_dict["parent_software"]["name"]) - # print(f"Software: {software}, Python: {python_extensions}, Perl: {perl_extensions}, R: {r_extensions}, Component: {components}") + components[component[0]]["versions"].append(ext_version_dict) + components[component[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"]) + + # Now that we've processed all the information let's add the entry + software[file_metadata["name"]]["versions"].append(version_dict) + return software, { "python": python_extensions, "perl": perl_extensions, @@ -228,6 +251,7 @@ def get_all_software(eessi_files_by_eessi_version): for version in all_software_information[software]["versions"]: if toolchain_family in version["toolchain_families_compatibility"]: reference_version = version + break if reference_version is None: raise ValueError(f"No toolchain compatibility in {all_software_information[software]}") for top_level_info in top_level_info_list + ["description"]: @@ -305,7 +329,10 @@ def main(): # - versionsuffix # - cpu_arch (list) # - gpu_arch (list, empty for now) - # - module_file + # - module + # - module_name + # - module_version + # - full_module_name # - required_modules (list of modules) base_json_metadata = {"timestamp": software_metadata["timestamp"]} eessi_versions = software_metadata["eessi_version"].keys()