Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions .github/workflows/prs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Generate and serve API data for EESSI
on:
pull_request:
branches:
- main
concurrency:
group: pr-${{ github.event.pull_request.number }}
cancel-in-progress: true
permissions:
contents: read
jobs:
test_data_generation:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: eessi/github-action-eessi@v3
with:
use_eessi_module: true
eessi_stack_version: "2025.06"
- name: Create a virtualenv to install zensical
run: |
python -m venv /tmp/venv_docs
source /tmp/venv_docs/bin/activate
pip install zensical
- name: Generate API data
run: |
echo "Generating data files..."
module purge
module unuse $MODULEPATH
module use /cvmfs/software.eessi.io/init/modules/
# Only do 2023.06 for EB 5 since this is just a test
( module load EESSI/2023.06 && module load EasyBuild/5 && module load EESSI-extend && python scripts/generate_data_files.py --eessi-version=2023.06 ) &
# Merge all these results together
wait
python scripts/merge_data_files.py out.yaml eessi*.yaml
mv out.yaml docs/data/eessi_software_metadata.yaml
# Generate json data files and markdown index/description for them
cd docs/data
python ../../scripts/process_eessi_software_metadata.py eessi_software_metadata.yaml eessi_api_metadata
python ../../scripts/calculate_hashes.py
for json_file in *.json; do
python ../../scripts/generate_schema_md.py $json_file >> index.md
done
- name: Test building the website
run: |
source /tmp/venv_docs/bin/activate
zensical build --clean
- name: Upload EESSI API metadata
uses: actions/upload-artifact@v4
with:
name: eessi-api-metadata
path: docs/data/eessi_api_metadata_software.json
37 changes: 22 additions & 15 deletions scripts/generate_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,14 @@ def suppress_stdout():
sys.stdout = old_stdout


def load_and_list_modules(module_name):
def module_dict_from_module_string(module):
module_name, module_version = module.split("/", 1)
module_dict = {"module_name": module_name, "module_version": module_version, "full_module_name": module}

return module_dict


def load_and_list_modules(full_module_name):
"""
Run `module load <name>` and `module list` inside a subshell.
Returns the list of loaded modules visible inside that subshell.
Expand All @@ -56,33 +63,33 @@ def load_and_list_modules(module_name):

# Run as one shell script so the same session is used
cmd = f"""
module load {module_name} || exit 1
module load {full_module_name} || exit 1
module --terse list 2>&1
"""

result = subprocess.run(["bash", "-c", cmd], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

if result.returncode != 0:
raise RuntimeError(f"Failed to load module '{module_name}':\n{result.stdout}")
raise RuntimeError(f"Failed to load module '{full_module_name}':\n{result.stdout}")

# Parse module list output
modules = [line for line in result.stdout.splitlines() if "/" in line]
modules = [module_dict_from_module_string(line) for line in result.stdout.splitlines() if "/" in line]

# Filter out the modules we expect to be loaded
eessi_extend_module_stub = "EESSI-extend/"
eb_module_stub = "EasyBuild/"
if module_name.startswith(eessi_extend_module_stub):
eessi_extend_module_name = "EESSI-extend"
eb_module_name = "EasyBuild"
if full_module_name.startswith(f"{eessi_extend_module_name}/"):
# Don't filter anything
pass
elif module_name.startswith(eb_module_stub):
elif full_module_name.startswith(f"{eb_module_name}/"):
# Filter EESSI-extend
modules = [module for module in modules if not module.startswith(eessi_extend_module_stub)]
modules = [module for module in modules if module["module_name"] != eessi_extend_module_name]
else:
# Filter EESSI-extend and EasyBuild
modules = [
module
for module in modules
if not module.startswith(eessi_extend_module_stub) and not module.startswith(eb_module_stub)
if module["module_name"] != eessi_extend_module_name and module["module_name"] != eb_module_name
]

return modules
Expand Down Expand Up @@ -209,7 +216,9 @@ def collect_eb_files(base_path):
# Store the toolchain hierarchies supported by the EESSI version
eessi_software["eessi_version"][eessi_version]["toolchain_hierarchy"] = {}
for top_level_toolchain in EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS[eessi_version]:
toolchain_family = f"{top_level_toolchain['name']}_{top_level_toolchain['version']}"
# versions are typically 2024a/2024b etc. for top level toolchains
# so let's use that to make sorting easy
toolchain_family = f"{top_level_toolchain['version']}_{top_level_toolchain['name']}"
# Get the hierarchy and always add the system toolchain
eessi_software["eessi_version"][eessi_version]["toolchain_hierarchy"][toolchain_family] = [
{"name": "system", "version": "system"}
Expand Down Expand Up @@ -241,6 +250,7 @@ def collect_eb_files(base_path):
]
shutil.rmtree(easyblocks_dir)

# Store everything we now know about the installation as a dict
# Use the path as the key since we know it is unique
eessi_software["eessi_version"][eessi_version][file] = parsed_ec["ec"].asdict()
eessi_software["eessi_version"][eessi_version][file]["mtime"] = os.path.getmtime(file)
Expand All @@ -255,11 +265,8 @@ def collect_eb_files(base_path):
eessi_software["eessi_version"][eessi_version].pop(file)
continue

# Store everything we now know about the installation as a dict
# Add important data that is related to the module environment
eessi_software["eessi_version"][eessi_version][file]["full_mod_name"] = parsed_ec["full_mod_name"]
eessi_software["eessi_version"][eessi_version][file]["short_mod_name"] = parsed_ec["short_mod_name"]
eessi_software["eessi_version"][eessi_version][file]["required_modules"] = load_and_list_modules(
eessi_software["eessi_version"][eessi_version][file]["module"] = module_dict_from_module_string(
parsed_ec["full_mod_name"]
)
# Retain the easyblocks used so we can use a heuristic to figure out the type of extensions (R, Python, Perl)
Expand Down
119 changes: 73 additions & 46 deletions scripts/process_eessi_software_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@
]

TOOLCHAIN_FAMILIES = [
"foss_2025b",
"foss_2025a",
"foss_2024a",
"foss_2023b",
"foss_2023a",
"foss_2022b",
"2025b_foss",
"2025a_foss",
"2024a_foss",
"2023b_foss",
"2023a_foss",
"2022b_foss",
]


Expand All @@ -47,7 +47,7 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
"toolchain_families_compatibility": [
key for key in toolchain_families.keys() if file_metadata["toolchain"] in toolchain_families[key]
],
"modulename": file_metadata["short_mod_name"],
"module": file_metadata["module"],
"required_modules": file_metadata["required_modules"],
}

Expand All @@ -65,7 +65,7 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool

# 2) Construct the modulefile path
before_arch, _, _ = original_path.partition(detected_arch)
modulefile = before_arch + detected_arch + "/modules/all/" + file_metadata["short_mod_name"] + '.lua'
modulefile = before_arch + detected_arch + "/modules/all/" + file_metadata["module"]["full_module_name"] + ".lua"
spider_cache = before_arch + detected_arch + "/.lmod/cache/spiderT.lua"

# 3) Substitute each architecture and test module file existence in spider cache
Expand Down Expand Up @@ -93,60 +93,77 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
version_dict["versionsuffix"] = file_metadata["versionsuffix"]
# No need for as we separate out the different types
# version_dict['type'] = "application"
software[file_metadata["name"]]["versions"].append(version_dict)
# - Now extensions
# - Now extensions, we keep them both separately for each type and
# as dicts with extension types in the specific installation
version_dict["extensions"] = []
python_extensions = {}
perl_extensions = {}
r_extensions = {}
octave_extensions = {}
ruby_extensions = {}
for ext in file_metadata["exts_list"]:
version_dict = copy.deepcopy(base_version_dict)
ext_version_dict = copy.deepcopy(base_version_dict)
# (extensions are tuples beginning with name and version)
version_dict["version"] = ext[1]
version_dict["versionsuffix"] = ""
ext_version_dict["version"] = ext[1]
ext_version_dict["versionsuffix"] = ""
# Add the parent software name so we can make a set for all versions
version_dict["parent_software"] = {
ext_version_dict["parent_software"] = {
"name": file_metadata["name"],
"version": file_metadata["version"],
"versionsuffix": file_metadata["versionsuffix"],
}
# First we do a heuristic to figure out the type of extension
if "pythonpackage.py" in file_metadata["easyblocks"]:
version_dict["description"] = (
f"""{ext[0]} is a Python package included in the software module for {version_dict['parent_software']['name']}"""
# First add it to our list of extensions for the parent software
version_dict["extensions"].append({"type": "python", "name": ext[0], "version": ext[1]})

# Now create the custom entry
ext_version_dict["description"] = (
f"""{ext[0]} is a Python package included in the software module for {ext_version_dict['parent_software']['name']}"""
)
python_extensions[ext[0]] = {"versions": [], "parent_software": set()}
python_extensions[ext[0]]["versions"].append(version_dict)
python_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"])
python_extensions[ext[0]]["versions"].append(ext_version_dict)
python_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"])
elif "rpackage.py" in file_metadata["easyblocks"]:
version_dict["description"] = (
f"""{ext[0]} is an R package included in the software module for {version_dict['parent_software']['name']}"""
# First add it to our list of extensions for the parent software
version_dict["extensions"].append({"type": "r", "name": ext[0], "version": ext[1]})

ext_version_dict["description"] = (
f"""{ext[0]} is an R package included in the software module for {ext_version_dict['parent_software']['name']}"""
)
r_extensions[ext[0]] = {"versions": [], "parent_software": set()}
r_extensions[ext[0]]["versions"].append(version_dict)
r_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"])
r_extensions[ext[0]]["versions"].append(ext_version_dict)
r_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"])
elif "perlmodule.py" in file_metadata["easyblocks"]:
version_dict["description"] = (
f"""{ext[0]} is a Perl module package included in the software module for {version_dict['parent_software']['name']}"""
# First add it to our list of extensions for the parent software
version_dict["extensions"].append({"type": "perl", "name": ext[0], "version": ext[1]})

ext_version_dict["description"] = (
f"""{ext[0]} is a Perl module package included in the software module for {ext_version_dict['parent_software']['name']}"""
)
perl_extensions[ext[0]] = {"versions": [], "parent_software": set()}
perl_extensions[ext[0]]["versions"].append(version_dict)
perl_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"])
perl_extensions[ext[0]]["versions"].append(ext_version_dict)
perl_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"])
elif "octavepackage.py" in file_metadata["easyblocks"]:
version_dict["description"] = (
f"""{ext[0]} is an Octave package included in the software module for {version_dict['parent_software']['name']}"""
# First add it to our list of extensions for the parent software
version_dict["extensions"].append({"type": "octave", "name": ext[0], "version": ext[1]})

ext_version_dict["description"] = (
f"""{ext[0]} is an Octave package included in the software module for {ext_version_dict['parent_software']['name']}"""
)
octave_extensions[ext[0]] = {"versions": [], "parent_software": set()}
octave_extensions[ext[0]]["versions"].append(version_dict)
octave_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"])
octave_extensions[ext[0]]["versions"].append(ext_version_dict)
octave_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"])
elif "rubygem.py" in file_metadata["easyblocks"]:
version_dict["description"] = (
f"""{ext[0]} is an Ruby gem included in the software module for {version_dict['parent_software']['name']}"""
# First add it to our list of extensions for the parent software
version_dict["extensions"].append({"type": "ruby", "name": ext[0], "version": ext[1]})

ext_version_dict["description"] = (
f"""{ext[0]} is an Ruby gem included in the software module for {ext_version_dict['parent_software']['name']}"""
)
ruby_extensions[ext[0]] = {"versions": [], "parent_software": set()}
ruby_extensions[ext[0]]["versions"].append(version_dict)
ruby_extensions[ext[0]]["parent_software"].add(version_dict["parent_software"]["name"])
ruby_extensions[ext[0]]["versions"].append(ext_version_dict)
ruby_extensions[ext[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"])
else:
raise ValueError(
f"Only known extension types are R, Python and Perl! Easyblocks used by {original_path} were {file_metadata['easyblocks']}"
Expand All @@ -155,24 +172,30 @@ def get_software_information_by_filename(file_metadata, original_path=None, tool
components = {}
if "components" in file_metadata.keys():
for component in file_metadata["components"]:
# First add it to our list of extensions for the parent software
version_dict["extensions"].append({"type": "component", "name": component[0], "version": component[1]})

# extensions are tuples beginning with name and version
if component[0] not in components.keys():
components[component[0]] = {"versions": [], "parent_software": set()}
version_dict = copy.deepcopy(base_version_dict)
version_dict["version"] = component[1]
version_dict["versionsuffix"] = ""
version_dict["type"] = "Component"
version_dict["parent_software"] = {
ext_version_dict = copy.deepcopy(base_version_dict)
ext_version_dict["version"] = component[1]
ext_version_dict["versionsuffix"] = ""
# version_dict["type"] = "Component"
ext_version_dict["parent_software"] = {
"name": file_metadata["name"],
"version": file_metadata["version"],
"version": file_metadata["versionsuffix"],
"versionsuffix": file_metadata["versionsuffix"],
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nasty little bug

}
version_dict["description"] = (
f"""{component[0]} is a component included in the software module for {version_dict['parent_software']['name']}"""
ext_version_dict["description"] = (
f"""{component[0]} is a component included in the software module for {ext_version_dict['parent_software']['name']}"""
)
components[component[0]]["versions"].append(version_dict)
components[component[0]]["parent_software"].add(version_dict["parent_software"]["name"])
# print(f"Software: {software}, Python: {python_extensions}, Perl: {perl_extensions}, R: {r_extensions}, Component: {components}")
components[component[0]]["versions"].append(ext_version_dict)
components[component[0]]["parent_software"].add(ext_version_dict["parent_software"]["name"])

# Now that we've processed all the information let's add the entry
software[file_metadata["name"]]["versions"].append(version_dict)

return software, {
"python": python_extensions,
"perl": perl_extensions,
Expand Down Expand Up @@ -228,6 +251,7 @@ def get_all_software(eessi_files_by_eessi_version):
for version in all_software_information[software]["versions"]:
if toolchain_family in version["toolchain_families_compatibility"]:
reference_version = version
break
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should break here once we have a hit since we want the most recent version.

if reference_version is None:
raise ValueError(f"No toolchain compatibility in {all_software_information[software]}")
for top_level_info in top_level_info_list + ["description"]:
Expand Down Expand Up @@ -305,7 +329,10 @@ def main():
# - versionsuffix
# - cpu_arch (list)
# - gpu_arch (list, empty for now)
# - module_file
# - module
# - module_name
# - module_version
# - full_module_name
# - required_modules (list of modules)
base_json_metadata = {"timestamp": software_metadata["timestamp"]}
eessi_versions = software_metadata["eessi_version"].keys()
Expand Down