From 889d77bc81cdecb0503056bf5b18e04c0f2ab593 Mon Sep 17 00:00:00 2001 From: Adrian Hill Date: Fri, 8 May 2026 19:11:10 +0100 Subject: [PATCH 01/15] Add Docker build, Docker CI driver, and GitHub Actions workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce three new automation entry points for OpenIFS, plus a shared helper library: - scripts/bootstrap/docker/ — container-based OpenIFS install: builds the toolchain image (apt + OpenMPI), clones or copies the source, and runs the test suite end-to-end. Replaces the legacy scripts/docker/ - scripts/ci/docker_ci/ — branch-vs-branch SAVED_NORMS bit-compare driver: builds a control branch and a test branch in matching images, runs openifs-test.sh -cbt, and writes a self-contained text report - scripts/shared/ — shared library used by both drivers (shared_helpers, docker_lib, ci_lib, read_yml_config, setup_logging, find_py_packages) - .github/workflows/bit-compare-docker.yml — GitHub Actions workflow that drives the docker_ci bit-compare on push/PR Removes the legacy scripts/docker/ tree (superseded by bootstrap/docker) and updates README.md, .gitignore, and oifs-config.edit_me.sh to point at the new paths. --- .github/workflows/bit-compare-docker.yml | 104 +++ .gitignore | 2 +- README.md | 6 +- oifs-config.edit_me.sh | 2 - scripts/{ => bootstrap}/docker/Dockerfile | 12 +- scripts/{ => bootstrap}/docker/README.md | 36 +- .../docker/config/create_openifs_docker.yml | 40 +- .../bootstrap/docker/create-oifs-docker.py | 387 +++++++++++ scripts/ci/docker_ci/Dockerfile.ci | 71 ++ scripts/ci/docker_ci/README.md | 156 +++++ scripts/ci/docker_ci/ci-oifs-docker.py | 575 ++++++++++++++++ .../ci/docker_ci/config/ci_test_docker.yml | 108 +++ .../ci/docker_ci/openifs_branch_bitcompare.py | 142 ++++ scripts/docker/create-oifs-docker.py | 613 ------------------ scripts/shared/README.md | 36 + scripts/shared/ci_lib.py | 170 +++++ scripts/shared/docker_lib.py | 233 +++++++ .../{docker => shared}/find_py_packages.py | 0 scripts/{docker => shared}/read_yml_config.py | 41 +- scripts/{docker => shared}/setup_logging.py | 0 scripts/shared/shared_helpers.py | 323 +++++++++ 21 files changed, 2383 insertions(+), 674 deletions(-) create mode 100644 .github/workflows/bit-compare-docker.yml rename scripts/{ => bootstrap}/docker/Dockerfile (88%) rename scripts/{ => bootstrap}/docker/README.md (83%) rename scripts/{ => bootstrap}/docker/config/create_openifs_docker.yml (55%) create mode 100644 scripts/bootstrap/docker/create-oifs-docker.py create mode 100644 scripts/ci/docker_ci/Dockerfile.ci create mode 100644 scripts/ci/docker_ci/README.md create mode 100644 scripts/ci/docker_ci/ci-oifs-docker.py create mode 100644 scripts/ci/docker_ci/config/ci_test_docker.yml create mode 100644 scripts/ci/docker_ci/openifs_branch_bitcompare.py delete mode 100644 scripts/docker/create-oifs-docker.py create mode 100644 scripts/shared/README.md create mode 100644 scripts/shared/ci_lib.py create mode 100644 scripts/shared/docker_lib.py rename scripts/{docker => shared}/find_py_packages.py (100%) rename scripts/{docker => shared}/read_yml_config.py (51%) rename scripts/{docker => shared}/setup_logging.py (100%) create mode 100644 scripts/shared/shared_helpers.py diff --git a/.github/workflows/bit-compare-docker.yml b/.github/workflows/bit-compare-docker.yml new file mode 100644 index 0000000..e508b2a --- /dev/null +++ b/.github/workflows/bit-compare-docker.yml @@ -0,0 +1,104 @@ +name: bit-compare-docker + +on: + pull_request: ~ + workflow_dispatch: ~ + pull_request_target: + types: [labeled] + +concurrency: + group: bit-compare-docker-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + bit-compare: + name: gcc-${{ matrix.gcc }} + if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }} + + strategy: + fail-fast: false + matrix: + gcc: ['12', '13', '14'] + + runs-on: ubuntu-22.04 + timeout-minutes: 180 + + env: + CI_DIR: ${{ github.workspace }}/scripts/ci/docker_ci + CONTROL_BRANCH: ${{ github.event.pull_request.base.ref || 'main' }} + OIFS_VERSION: '48r1' + WORK_DIR: ${{ github.workspace }}/_oifs_docker_ci + NORMS_DIR: ${{ github.workspace }}/_oifs_docker_ci/control_saved_norms + REPORTS_DIR: ${{ github.workspace }}/_oifs_docker_ci/ci_reports + + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.sha || github.sha }} + + - name: Resolve control SHA + id: control + run: | + SHA=$(git ls-remote https://github.com/Adehill/openifs.git \ + "refs/heads/${CONTROL_BRANCH}" | cut -f1) + if [ -z "$SHA" ]; then + echo "Could not resolve refs/heads/${CONTROL_BRANCH}" >&2 + exit 1 + fi + echo "sha=${SHA}" >> "$GITHUB_OUTPUT" + + - name: Restore control NORMS cache + uses: actions/cache@v4 + with: + path: ${{ env.NORMS_DIR }} + key: control-norms-gcc${{ matrix.gcc }}-${{ env.OIFS_VERSION }}-${{ steps.control.outputs.sha }} + + - name: Set up Python venv + run: | + set -eu + python3 -m venv "${WORK_DIR}/venv" + source "${WORK_DIR}/venv/bin/activate" + python3 -m pip install --upgrade pip + python3 -m pip install gitpython pyyaml + + - name: Render CI config + run: | + set -eu + mkdir -p "${WORK_DIR}" + cat > "${WORK_DIR}/ci_test_docker.yml" < clone that branch from openifs_repo_url (default) +# a directory path -> copy that local checkout into the Docker build directory +# empty / not set -> auto-detect the checkout containing this script +openifs_source: "main" -# Repository URL (requires SSH access) -openifs_repo_url: "git@github.com:ecmwf-ifs/openifs.git" +# Repository URL +openifs_repo_url: "https://github.com/ecmwf-ifs/openifs.git" # SCM experiment data URL (tar.gz or tar file) scm_url: https://openifs.ecmwf.int/data/scm/48r1/scm_openifs_48r1.tar.gz -# Clone repository (True) or use existing directory (False) -clone_openifs: True - -# Force removal of existing clone without prompting +# Force removal of existing source directory before re-staging force_reclone: False # Run openifs build command after building image @@ -159,11 +159,15 @@ This is a clean container in which `source oifs-config.edit_me.sh` is run upon s - Validates base Docker image is from official sources (security) - Checks if base image exists locally, pulls if needed -### Step 2: Repository Setup +### Step 2: Source Setup + +Resolves `openifs_source` using the same three-mode convention as the CI driver: + +- **Branch name** (default, e.g. `"main"`) — shallow-clones from `openifs_repo_url` into the build directory +- **Directory path** (e.g. `"~/src/openifs"`) — copies that local checkout into the build directory, skipping transient artefacts (`.git`, `build/`, `__pycache__`, etc.) +- **Empty** — auto-detects the OpenIFS checkout that contains this script (useful when running from inside the repository) -- Shallow clones OpenIFS from specified branch (if `clone_openifs: True`) -- Copies SCM experiment data to build directory -- Updates configuration files with correct paths +`force_reclone: True` removes and re-stages the source regardless of mode. ### Step 3: Docker Build @@ -185,10 +189,10 @@ This is a clean container in which `source oifs-config.edit_me.sh` is run upon s - Set `force_rebuild: True` to rebuild - Or manually remove the image -### Clone Directory Exists +### Source Directory Exists -- Set `force_reclone: True` to remove and re-clone -- Or set `clone_openifs: False` to use existing directory +- Set `force_reclone: True` to remove and re-stage the source +- Or change `openifs_source` to a different branch or path ### Base Image Not Found diff --git a/scripts/docker/config/create_openifs_docker.yml b/scripts/bootstrap/docker/config/create_openifs_docker.yml similarity index 55% rename from scripts/docker/config/create_openifs_docker.yml rename to scripts/bootstrap/docker/config/create_openifs_docker.yml index 0c0be82..1ddec9e 100644 --- a/scripts/docker/config/create_openifs_docker.yml +++ b/scripts/bootstrap/docker/config/create_openifs_docker.yml @@ -9,27 +9,38 @@ # nor does it submit to any jurisdiction. # -# Openifs version +# OpenIFS version (used for directory naming, image tagging, and as a +# component of every data-archive URL below). openifs_version : "48r1" -# The OpenIFS branch to extract from repo -openifs_branch : "main" - -# Climate data version +# Climate data version (embedded in the climate tarball filename). climate_version : "climate.v020" -# Base URL for OpenIFS data files +# OpenIFS repository (must be accessible from the machine running this script). +openifs_repo_url : "https://github.com/Adehill/openifs.git" + +# Base URL for OpenIFS data files. Per-version ifsdata, rtables and +# climate tarballs are derived from this in docker_lib.modify_dockerfile(). openifs_data_base_url : "https://sites.ecmwf.int/openifs/openifs-download/ifsdata" -# SCM experiment data URL (tar.gz or tar file) +# SCM experiment data archive. scm_url : "https://openifs.ecmwf.int/data/scm/48r1/scm_openifs_48r1.tar.gz" -# OpenIFS experiment package (low res) +# Low-resolution 3-D experiment package. openifs_expt_url : "https://sites.ecmwf.int/openifs/openifs-download/experiments/48r1/2016-09-25_Karl/ab7z.tar.gz" +# The OpenIFS branch to build inside the Docker image. +# openifs_source controls where the OpenIFS tree comes from. +# Three modes — same convention as the CI test_branch key: +# empty / not set -> auto-detect the OpenIFS checkout containing this script +# a directory path -> copy that local checkout into the Docker build directory +# (transient artefacts .git, build/, __pycache__ are skipped) +# a branch name -> clone that branch from openifs_repo_url +openifs_source : "" + # Docker image - the following images are known to work. #base_docker_image : "12.3.0-bullseye" -base_docker_image : "13" +base_docker_image : "14" # Path for the Docker file template docker_template : "./Dockerfile" @@ -37,17 +48,11 @@ docker_template : "./Dockerfile" # Directory to store the OpenIFS Dockerfile and base OpenIFS version and files openifs_build_docker_dir : "~/oifs_docker_create_dir" -# The URL for the OpenIFS repository, where the branch will be extracted -openifs_repo_url : "git@github.com:ecmwf-ifs/openifs.git" - -# True to clone OpenIFS repository, False to use existing -clone_openifs : True - -# Force removal of existing clone directory without prompting +# Force removal of existing clone/staged directory before re-staging force_reclone : True # Force rebuild of Docker image even if it exists -force_rebuild : False +force_rebuild : True # Run tests after building image run_tests : True @@ -60,4 +65,3 @@ remove_test_container : False # Skip URL validation (useful if behind firewall or for offline builds) skip_url_validation : False - diff --git a/scripts/bootstrap/docker/create-oifs-docker.py b/scripts/bootstrap/docker/create-oifs-docker.py new file mode 100644 index 0000000..7c5284b --- /dev/null +++ b/scripts/bootstrap/docker/create-oifs-docker.py @@ -0,0 +1,387 @@ +#! /usr/bin/env python3 +# +# (C) Copyright 2011- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# +import argparse +import logging +import os +import shutil +import subprocess +import sys +import time + +# Generic helpers and Docker helpers all live in scripts/shared/. +_SHARED_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "shared") +if _SHARED_DIR not in sys.path: + sys.path.insert(0, _SHARED_DIR) + +import setup_logging +import read_yml_config +import find_py_packages + +# Docker-specific helpers from scripts/shared/docker_lib.py. +from docker_lib import ( # type: ignore[import-not-found] + build_docker_image, + check_docker_image_exists, + is_official_docker_image, + modify_dockerfile, + pull_docker_image, +) +# Generic (non-Docker) helpers from scripts/shared/shared_helpers.py. +from shared_helpers import ( # type: ignore[import-not-found] + format_duration, + move_to_backup, + resolve_openifs_source, + shallow_clone, + slug, + timer, +) + +def parse_arguments() : + parser = argparse.ArgumentParser( + description=f""" +create_openifs_docker and the associated modules creates a +container for the stand-alone package for OpenIFS. + +This script automates: + 1. Cloning OpenIFS from the specified branch + 2. Copying SCM experiment data + 3. Building a Docker image with GCC and required libraries + 4. Running OpenIFS tests to verify the installation + +For detailed documentation, see README.md + +Prerequisites: + - Docker installed and running + - Python 3 with git, yaml modules (see README.md for setup) + - SSH access to OpenIFS repository + +Usage: + python3 create-oifs-docker.py -c config/create_openifs_docker.yml + +For more information: README.md#detailed-configuration + +""", + formatter_class=argparse.RawDescriptionHelpFormatter) + + parser.add_argument("--config", "-c", type=str, + help="YAML configuration file (see config/create_openifs_docker.yml)") + + args = parser.parse_args() + + ######### Check for command line arguments ########################################### + # + # Check that user has provided a branch name, if not exit + # + if args.config is None : + parser.print_help() + print(f""" +[ERROR]: User must provide an a yml config file using --config, e.g. + /create_openifs_driver.py -c config/create_openifs_config.yml + """) + sys.exit() + + ######################################################################################## + + return args + + +def run_openifs_test(openifs_version, image_name, + run_tests=True, + run_scm_test=True, + remove_container=True): + """ + Run openifs-test build inside the Docker container and report results. + Tests are also run, depending on the arguments and the yml config + + Args: + openifs_version: OpenIFS version string + image_name: Docker image name to test + run_tests : Run the OpenIFS tests + run_scm_test : Run the standard SCM cases + remove_container: If True, remove container after test completes (default: True) + """ + logger = logging.getLogger(__name__) + + container_name = f"oifs-{openifs_version}" + + # Remove any existing container with the same name + check_result = subprocess.run( + ["docker", "inspect", "--format", "{{.Name}}", container_name], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL + ) + if check_result.returncode == 0: + logger.warning(f"Container '{container_name}' already exists and will be removed") + subprocess.run(["docker", "rm", "-f", container_name], check=True, + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + logger.info(f"Existing container '{container_name}' removed successfully") + + # Start container with /bin/bash as main process + logger.info(f"Starting container '{container_name}' from image {image_name}...") + run_cmd = [ + "docker", "run", "-dit", + "--name", container_name, + image_name, + "/bin/bash" + ] + subprocess.run(run_cmd, check=True) + logger.info(f"Container '{container_name}' started. Re-enter later with:") + logger.info(f" docker start {container_name} && docker exec -it {container_name} /bin/bash") + + # Build test command (unchanged) + test_cmd = ( + f"source ~/{openifs_version}/oifs-config.edit_me.sh && " + f"$OIFS_TEST/openifs-test.sh -cb -j 8" + ) + if run_tests: + test_cmd += " && $OIFS_TEST/openifs-test.sh -t" + if run_scm_test: + test_cmd += " && cd $OIFS_HOME && $SCM_TEST/callscm" + + # Execute test command inside the running container via exec + exec_cmd = [ + "docker", "exec", "-it", + container_name, + "bash", "-lc", + test_cmd + ] + + logger.info(f"Running tests via exec: {' '.join(exec_cmd)}\n") + + try: + subprocess.run(exec_cmd, check=True) + logger.info("OpenIFS built successfully") + if run_tests: + logger.info("OpenIFS tests passed successfully") + if run_scm_test: + logger.info("SCM test also passed successfully") + if remove_container: + subprocess.run(["docker", "rm", "-f", container_name], check=True) + logger.info(f"Container '{container_name}' removed") + else: + logger.info(f"Container '{container_name}' left running. Use 'docker ps' to see it.") + logger.info(f"Container can be restarted using 'docker exec -it {container_name} /bin/bash'") + return True + except subprocess.CalledProcessError as e: + logger.error(f"OpenIFS tests failed: {e}") + logger.error(f"stdout: {e.stdout}") + logger.error(f"stderr: {e.stderr}") + if not remove_container: + logger.info("Container was not removed. Use 'docker ps -a' to inspect it.") + return False + +def main(): + + script_start_time = time.time() + timings = {} + + # Read yaml config path from the command line + cli_args = parse_arguments() + + # As the command line arguments have been accepted, now + # check that the "non-standard" python modules are available + pymod_list=["git","yaml"] + # + find_py_packages.main(pymod_list) + + config = read_yml_config.main(cli_args.config) + + # Resolve openifs_source once and reuse the result everywhere. + # The raw value comes from openifs_source (preferred) or the legacy + # openifs_branch key. resolve_openifs_source maps "" -> auto-detect. + _raw = config.get('openifs_source') or config.get('openifs_branch', '') + _source_kind, _source_value = resolve_openifs_source(_raw, __file__) + if _source_kind == 'remote': + _tag = slug(_source_value) + else: + _tag = f"local-{slug(os.path.basename(os.path.realpath(_source_value)))}" + config.setdefault('openifs_branch', _tag) + + log_dir = os.path.join(config['openifs_build_docker_dir'], "docker_bld_logfiles") + + # Create directory if it doesn't exist + os.makedirs(log_dir, exist_ok=True) + + log_file_path = os.path.join(log_dir, f"log_{config['openifs_version']}_{config['base_docker_image']}.log") + + # Setup to write logfile in the current working directory. Using default log info + setup_logging.main(log_file_path) + logger = logging.getLogger(__name__) + + # Docker Base Image Validation + with timer("Docker Base Image Validation", timings, 'image_validation'): + base_image = f"gcc:{config['base_docker_image']}" + + # Security check: only allow official/vetted images + logger.info(f"Validating base Docker image {base_image}...") + if not is_official_docker_image(base_image): + logger.error(f"Security check failed: '{base_image}' is not an approved official image") + logger.error("Only official Docker images are allowed for security reasons") + logger.error("If you need to use a different image, add it to ALLOWED_OFFICIAL_IMAGES in the code") + sys.exit(1) + + logger.info(f"Security check passed: {base_image} is an official image") + + # Check if image exists locally + logger.info(f"Checking if base Docker image {base_image} exists locally...") + if not check_docker_image_exists(base_image): + logger.warning(f"Base Docker image {base_image} not found locally") + logger.info("Attempting to pull from Docker Hub...") + + if not pull_docker_image(base_image): + logger.error(f"Failed to pull base Docker image {base_image}") + logger.error("Please check your internet connection and Docker Hub status") + logger.error(f"You can try manually: docker pull {base_image}") + sys.exit(1) + else: + logger.info(f"Base Docker image {base_image} is available locally") + + # Dockerfile Preparation + with timer("Dockerfile Preparation", timings, 'dockerfile_prep'): + docker_file_name = f"Dockerfile_{config['openifs_version']}_{config['base_docker_image']}" + dockerfile_path = os.path.join(config['openifs_build_docker_dir'], docker_file_name) + + # Check if Dockerfile exists and create backup + if os.path.exists(dockerfile_path): + logger.warning(f"Dockerfile {dockerfile_path} already exists, creating backup") + shutil.copyfile(dockerfile_path, f"{dockerfile_path}.bak") + else: + logger.info(f"Creating Dockerfile {dockerfile_path}") + + # Check if template exists + docker_template = config['docker_template'] + if not os.path.exists(docker_template): + logger.error(f"Docker template file not found: {docker_template}") + logger.error("Please check 'docker_template' path in your config file") + sys.exit(1) + + shutil.copyfile(docker_template, dockerfile_path) + modify_dockerfile(dockerfile_path, config) + + # OpenIFS Repository Setup + with timer("OpenIFS Repository Setup", timings, 'repo_setup'): + openifs_dir = os.path.join(config['openifs_build_docker_dir'], config['openifs_version']) + + # Reuse the resolution computed at the top of main() so we don't + # accidentally re-interpret the derived openifs_branch tag as a + # remote branch name (the bug that caused clone of + # "local-openifs-casim" when openifs_source was empty). + source_kind, source_value = _source_kind, _source_value + + if source_kind == 'remote': + logger.info(f"Cloning branch '{source_value}' to {openifs_dir}") + shallow_clone( + config['openifs_repo_url'], + openifs_dir, + branch=source_value, + force=config.get('force_reclone', False), + ) + source_tag = slug(source_value) + else: + # 'auto' or 'local' — stage the resolved local path into the build dir. + local_src = source_value + if not os.path.isdir(local_src): + logger.error(f"OpenIFS source not found at {local_src}") + logger.error("Check 'openifs_source' in your config or re-run from inside the checkout") + sys.exit(1) + if os.path.abspath(local_src) == os.path.abspath(openifs_dir): + logger.info(f"Source and build dir are the same ({openifs_dir}); skipping copy") + elif os.path.exists(openifs_dir) and not config.get('force_reclone', False): + logger.info(f"Using existing staged source at {openifs_dir} (force_reclone=False)") + else: + if os.path.exists(openifs_dir): + # force_reclone=True path: rename the existing staged + # tree to a timestamped backup so any uncommitted or + # unpushed work survives instead of being deleted. + move_to_backup(openifs_dir) + logger.info(f"Copying local source {local_src} -> {openifs_dir}") + shutil.copytree( + local_src, openifs_dir, + symlinks=True, + ignore=shutil.ignore_patterns( + '.git', 'build', '.cache', '.bootstrap', + '__pycache__', '*.pyc', '*.pyo', 'openifs-env', + ), + ) + source_tag = f"local-{slug(os.path.basename(os.path.realpath(local_src)))}" + + # Propagate source_tag so modify_dockerfile and the image name both + # reflect where this build came from (mirrors ci-oifs-docker.py). + config['openifs_branch'] = source_tag + + # Docker Image Build + oifs_image_name = f"openifs-{config['openifs_version']}-gcc{config['base_docker_image']}:{config['openifs_branch']}" + + force_rebuild = config.get('force_rebuild', False) + + with timer("Docker Image Build", timings, 'image_build'): + logger.info(f"Building Docker image {oifs_image_name}...") + if force_rebuild: + logger.info("force_rebuild=True: building without cache") + else: + logger.info("force_rebuild=False: building with cache") + logger.info(f"Building Docker image {oifs_image_name}...") + build_docker_image(dockerfile_path, oifs_image_name, config['openifs_build_docker_dir'], no_cache=force_rebuild) + logger.info(f"Docker image {oifs_image_name} built successfully!") + + # OpenIFS Build and Test + run_build = config.get('run_build', True) + run_tests = config.get('run_tests', True) + run_scm_test = config.get('run_scm_test', True) + test_success = False + + if run_build: + with timer("OpenIFS Build and Test", timings, 'build_and_test'): + test_success = run_openifs_test( + config['openifs_version'], + oifs_image_name, + run_tests, + run_scm_test, + config.get('remove_test_container', True), + ) + + if test_success: + logger.info("All tests passed successfully") + else: + logger.error("Tests failed - check build configuration") + else: + logger.info("Skipping build and tests (run_build: False in config)") + timings['build_and_test'] = 0 + + # Final Summary + total_time = time.time() - script_start_time + + logger.info("=" * 70) + logger.info("FINAL SUMMARY") + logger.info("=" * 70) + logger.info("Configuration:") + logger.info(f" Image: {oifs_image_name}") + logger.info(f" Cache: {'disabled (--no-cache)' if force_rebuild else 'enabled'}") + logger.info(f" OpenIFS Build: {'Passed' if run_build and test_success else 'Failed' if run_build else 'Skipped'}") + logger.info(f" OpenIFS Tests: {'Passed' if run_tests and test_success else 'Failed' if run_tests else 'Skipped'}") + logger.info(f" SCM Tests: {'Passed' if run_scm_test and test_success else 'Failed' if run_scm_test else 'Skipped'}") + logger.info("=" * 70) + logger.info("Timing Summary:") + logger.info(f" Image Validation: {format_duration(timings['image_validation'])}") + logger.info(f" Dockerfile Prep: {format_duration(timings['dockerfile_prep'])}") + logger.info(f" Repository Setup: {format_duration(timings['repo_setup'])}") + logger.info(f" Image Build: {format_duration(timings['image_build'])}") + + if run_build: + logger.info(f" Build & Test: {format_duration(timings['build_and_test'])}") + else: + logger.info(f" Build & Test: Skipped") + logger.info(" " + "-" * 66) + logger.info(f" Total: {format_duration(total_time)}") + logger.info("=" * 70) + +if __name__ == "__main__": + + main() \ No newline at end of file diff --git a/scripts/ci/docker_ci/Dockerfile.ci b/scripts/ci/docker_ci/Dockerfile.ci new file mode 100644 index 0000000..d677c7e --- /dev/null +++ b/scripts/ci/docker_ci/Dockerfile.ci @@ -0,0 +1,71 @@ +# (C) Copyright 2011- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# +# CI-only minimal Dockerfile. +# +# Differences vs. ../docker/Dockerfile: +# - No SCM data (SCM_URL) download +# - No 3D experiment data (OPENIFS_EXPT_URL) download +# - No oifs-run preparation (no $OIFS_EXPT/ab7z layout) +# - No ifsdata / rtables / climate.v0xx download — `openifs-test.sh -cbt` +# drives ctest, which fetches its own test inputs from ECPDS via +# ifs-test/bin/storage.py into /.cache. The OIFS data files +# are only needed for `oifs-run`-style experiments, which CI does not run. +# +# This image is used by scripts/ci/docker_ci/ci-oifs-docker.py to build the +# control and the test branch and run `openifs-test.sh -cbt` for NORMS +# comparison only — no experiment runs, no SCM runs. + +FROM docker.io/library/gcc:13.2.0-bookworm + +## Force container shell to be bash +SHELL ["/bin/bash", "-l", "-c"] + +ARG OPENIFS_DIR= +ARG OPENIFS_REPO_URL= +ARG OPENIFS_BRANCH= + +## Install required compilers and libraries, build OpenMPI, and cleanup in one layer +RUN apt update && \ + apt install -y git cmake python3 python3-ruamel.yaml python3-yaml python3-venv \ + libomp-dev libboost-dev libboost-date-time-dev libboost-filesystem-dev \ + libboost-serialization-dev libboost-program-options-dev netcdf-bin \ + libnetcdf-dev libnetcdff-dev liblapack-dev libeigen3-dev vim emacs \ + wget bc ca-certificates && \ + update-ca-certificates && \ + cd /tmp && \ + wget https://download.open-mpi.org/release/open-mpi/v5.0/openmpi-5.0.10.tar.gz && \ + tar -xvf openmpi-5.0.10.tar.gz && \ + cd openmpi-5.0.10 && \ + ./configure --disable-libxml2 --prefix=/usr/local && \ + make -j4 all && \ + make install && \ + ldconfig && \ + cd / && \ + rm -rf /tmp/openmpi-5.0.10* && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create non-root user (uid 1000) and the OpenIFS source dir. +# No expt directory — CI does not run experiments. +RUN groupadd --gid 1000 openifs && \ + useradd --uid 1000 --gid openifs --shell /bin/bash --create-home openifs && \ + mkdir -p /home/openifs/${OPENIFS_DIR} && \ + chown -R openifs:openifs /home/openifs + +USER openifs + +# Copy the OpenIFS source tree (cloned by ci-oifs-docker.py into the build context) +WORKDIR /home/openifs/${OPENIFS_DIR} +COPY --chown=openifs:openifs ${OPENIFS_DIR} /home/openifs/${OPENIFS_DIR} + +# Patch oifs-config.edit_me.sh so OIFS_HOME points at the copied source dir +# and source it on every shell login (matches the full Dockerfile). +RUN sed -i 's|export OIFS_HOME="${HOME}/.*"|export OIFS_HOME="${HOME}/'${OPENIFS_DIR}'"|g' oifs-config.edit_me.sh && \ + echo "source /home/openifs/${OPENIFS_DIR}/oifs-config.edit_me.sh" >> /home/openifs/.bashrc diff --git a/scripts/ci/docker_ci/README.md b/scripts/ci/docker_ci/README.md new file mode 100644 index 0000000..11896b6 --- /dev/null +++ b/scripts/ci/docker_ci/README.md @@ -0,0 +1,156 @@ +# OpenIFS Docker CI test — control vs test branch + +Builds two Docker images (one per branch). In each container, runs +`openifs-test.sh -cb` (configure + build), then `openifs-test.sh -t` +(ctest) with `IFS_TEST_BITIDENTICAL=init IFS_TEST_LEGACY=1` so the +framework drops a `SAVED_NORMS` reference file in every `test*/` subdir. +The two stages are run separately so the ctest output can be captured +in isolation and included in the report. + +The bit-comparison runs **inside** the test container — the host never +reads NORMS data. The host receives a self-contained text report +(`-__.txt`) plus the raw per-branch ctest +output files. Three exit codes: **0** PASS, **1** FAIL, **2** +INCONCLUSIVE (control failed, bit-comparison skipped — see below). + +## Prerequisites + +- Docker installed and running +- Python 3 with `gitpython` and `pyyaml` (same venv as `../docker/`) +- Git configured with SSH access to the OpenIFS repository + +```bash +cd scripts/ci/docker_ci + +# Re-use the venv from the sibling docker/ builder +source ../docker/openifs-env/bin/activate # (or create a new one) +python3 -m pip install gitpython pyyaml +``` + +## Quick start + +1. Edit `config/ci_test_docker.yml` — set `control_branch` and + `test_branch` (and `openifs_repo_url` if not the default). + +2. Run: + + ```bash + python3 ci-oifs-docker.py -c config/ci_test_docker.yml + ``` + +3. Read the report at `/-__.txt` (the filename encodes the control branch + its 7-char commit SHA, and either the test branch or the basename of the local source dir when `test_branch` is empty or a path). The script + exits **0** on PASS, **1** on FAIL, **2** on INCONCLUSIVE. + +## What runs where + +``` +host control container test container +──────────────────────────────────────────────────────────────────────────── +ci-oifs-docker.py + ├── build & test ─────► openifs-test.sh -cb, then -t (output captured) + │ ◄── docker cp ──── ctest output ──► openifs_test_output_control.txt + │ ◄── docker cp ──── SAVED_NORMS tarball + │ (control failure here is tolerated → INCONCLUSIVE; script continues) + │ + ├── build & test ───────────────────────────────► openifs-test.sh -cb, then -t (captured) + │ ◄── docker cp ─────────────────────────────── ctest output ──► openifs_test_output_test.txt + │ + ├── docker cp control tarball + bitcompare.py INTO test container + ├── docker exec ────────────────────────────────► python3 bitcompare --report + │ ◄── docker cp ─────────────────────────────── -__.txt + │ + ├── append both ctest outputs + CI summary to the host-side report + │ + └── exit 0 (PASS) | 1 (FAIL) | 2 (INCONCLUSIVE — control failed) +``` + +## Output and exit codes + +After a run, three artefacts land in ``: + +| File | Contents | +| ---- | -------- | +| `-__.txt` | Self-contained text report (see below) | +| `openifs_test_output_control.txt` | Raw ctest stdout/stderr from the control container | +| `openifs_test_output_test.txt` | Raw ctest stdout/stderr from the test container | + +The `-__…` file is the headline artefact. Its sections, in order: + +1. **Bit-comparison body** — per-test PASS/FAIL lines from + `openifs_branch_bitcompare.py`, ending with `RESULT: PASS|FAIL`. + Replaced by a `BIT-COMPARISON SKIPPED` header + `RESULT: SKIPPED` when + the control phase failed and there were no SAVED_NORMS to compare. +2. **`CTEST OUTPUT — control ()`** — raw ctest output from the + control container (verbatim copy, never edited by the script). +3. **`CTEST OUTPUT — test ()`** — same for the test container. +4. **`CI SUMMARY`** — branches with annotation + (`built + tested` / `reused cached NORMS` / `FAILED — bit-comparison skipped`), + bit-comparison verdict (`PASS` / `FAIL` / `SKIPPED`), final result + (`PASS` / `FAIL` / `INCONCLUSIVE`), report path, control NORMS path, + per-phase timings, total wall-clock. + +### Exit codes + +| Code | Meaning | +| ---- | ------- | +| 0 | **PASS** — both branches built and ctest'd, all SAVED_NORMS bit-matched | +| 1 | **FAIL** — both branches built and ctest'd, at least one test's SAVED_NORMS differed | +| 2 | **INCONCLUSIVE** — control phase failed (build or ctest); bit-comparison skipped. Useful when the test branch is the fix for a broken main: the test side still builds and runs, you just can't bit-compare against a missing reference. The test branch's own ctest output is still in the report | + +The **test phase is strict**: a test-branch build/ctest failure is a hard +error and aborts the script (no INCONCLUSIVE fallback for that side). +For test-side debugging, set `remove_test_container: False` and inspect +the container with `docker exec -it oifs-ci-test /bin/bash`. + +## Key config flags + +| Flag | Purpose | +| ---- | ------- | +| `control_branch` | Remote branch name to use as the reference (always cloned, unless `clone_openifs_control: False` and you pre-populate the build dir) | +| `test_branch` | Accepts: empty (auto-resolves to `/../..`), a directory path (used as a local source — `~` is expanded), or a remote branch name (cloned from `openifs_repo_url`). Detected automatically. Local sources are copied into the build dir, skipping `.git`, `build/`, `.cache`, `.bootstrap`, `__pycache__`, `openifs-env` | +| `openifs_repo_url` | Source repo (SSH) | +| `base_docker_image` | GCC tag from docker.io/library/gcc | +| `openifs_test_extra_flags` | Extra configure-time args forwarded to `openifs-test.sh -cb` (e.g. `--without-double-precision --cmake=BUILD_ifsbench=OFF --clean`). Not passed to the `-t` ctest call | +| `clone_openifs_control` | When False, do not clone the control branch — expect the source pre-populated at `/build_dir_control//`. Test has no equivalent flag (auto-detected from `test_branch`) | +| `reuse_control_if_present` | Skip the entire control phase if the image-keyed control NORMS tarball (`control_saved_norms__gcc.tgz`) already exists | +| `remove_test_container` | Whether to `docker rm -f` containers on success | +| `force_rebuild`, `force_reclone` | Same semantics as `../docker/config/create_openifs_docker.yml`. Recommend `force_rebuild: False` for fast iteration: control and test images share their first ~80% of layers (apt install, OpenMPI build, user setup), so the second sequential build reuses the first's cache and only re-runs from `COPY ${OPENIFS_DIR}` onward. Flip to `True` only if you suspect cache corruption | + +## Files + +| File | Role | +| --------------------------------- | ---- | +| `ci-oifs-docker.py` | Driver — orchestrates both branches and the in-container comparison | +| `Dockerfile.ci` | Minimal image — only the toolchain (apt + OpenMPI build) and the OpenIFS source. No SCM data, no experiment data, no ifsdata/rtables/climate (ctest fetches its own inputs from ECPDS via `ifs-test/bin/storage.py`) | +| `openifs_branch_bitcompare.py` | SAVED_NORMS comparator (per-test `filecmp`-based, with `--report`) | +| `config/ci_test_docker.yml` | Configuration | + +Shared helpers (`shared_helpers.py`, `docker_lib.py`, `ci_lib.py`, +`read_yml_config.py`, `setup_logging.py`, `find_py_packages.py`) all live +in `../../shared/` and are imported via a `sys.path` insert at the top of +`ci-oifs-docker.py`. + +## Base image + +The CI is set up around the official `docker.io/library/gcc:` images +(Debian-based, GCC pre-installed). To switch to a different base image +family (e.g. plain Ubuntu, or a non-Debian distro), edit `Dockerfile.ci`: +update the `FROM` line and adjust the `apt install` packages — for plain +Ubuntu add `gcc- g++- gfortran-`; for Alpine/RHEL/SUSE swap `apt` +for the matching package manager. + +## NORMS convention + +The CI relies on the `ifs-test` framework's environment variables (see +`../../ifs-test/README.md`): + +- `IFS_TEST_BITIDENTICAL=init` → the framework writes `SAVED_NORMS` per test +- `IFS_TEST_LEGACY=1` → use the `ifs-grep-norms.pl` legacy flow + +## Out of scope + +- 3D experiment download/runs (`openifs_expt_url`, `oifs-run`) +- SCM data download/runs (`scm_url`, `callscm`) +- Tolerance-based NORMS comparison (`IFS_TEST_TOLERANCE`) — bit-identical + `filecmp` only +- CI runner integration (GitHub Actions / GitLab) — only the local scripts diff --git a/scripts/ci/docker_ci/ci-oifs-docker.py b/scripts/ci/docker_ci/ci-oifs-docker.py new file mode 100644 index 0000000..0bdd54b --- /dev/null +++ b/scripts/ci/docker_ci/ci-oifs-docker.py @@ -0,0 +1,575 @@ +#! /usr/bin/env python3 +# +# (C) Copyright 2011- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# +""" +Docker-based CI test for OpenIFS — control branch vs test branch. + +Per branch: + 1. Source the branch into a per-label build directory (clone or local copy) + 2. Build a Docker image from Dockerfile.ci (no SCM / experiment data) + 3. Configure + build with ``openifs-test.sh -cb``, then run ctest with + ``-t``. Both calls set IFS_TEST_BITIDENTICAL=init IFS_TEST_LEGACY=1 so + the framework drops a SAVED_NORMS file in every test*/ subdir. The + ctest stage's stdout/stderr is tee'd to a file inside the container + and ``docker cp``'d out for inclusion in the host-side report. + +The control branch is run first and its SAVED_NORMS tree is bundled out as +``/control_saved_norms__gcc.tgz`` +(intermediate blob — kept for debugging, never read by host code). The +name is keyed on the OpenIFS version and GCC base image so different +(version, compiler) combinations cache independently. When +``reuse_control_if_present`` is set in the config and the matching tarball +already exists, the entire control phase is skipped. + +Control failure is tolerated. If the control build/ctest fails (e.g. main +doesn't compile and the test branch is the fix), the script: + - logs the failure, cleans up the control container + - continues to the test phase (which is still strict — test failures abort) + - skips the bit-comparison + - writes a synthetic report explaining the situation + - exits with code 2 (INCONCLUSIVE) so a CI gate can distinguish from a + clean PASS (0) or clean FAIL (1) + +Then the test branch is built and run. If the control side succeeded: + 4. ``docker cp`` the control tarball INTO the test container + 5. ``docker cp`` openifs_branch_bitcompare.py INTO the test container + 6. ``docker exec`` the comparator inside the test container with --report — + it untars the control tree, compares each test*/SAVED_NORMS pair, and + writes a self-contained text report + 7. ``docker cp`` ONLY the report out to + ``/-__.txt`` + +The host never reads NORMS data. The captured ctest outputs and a final +CI summary are then appended to the report file so it's fully +self-contained. The script's exit code drives PASS / FAIL / INCONCLUSIVE. + +Usage: + python3 ci-oifs-docker.py -c config/ci_test_docker.yml +""" + +import argparse +import logging +import os +import shutil +import subprocess +import sys +import time + +# Generic helpers (YAML loader, logger setup, module checker, shared_helpers) +# and Docker-specific helpers (docker_lib) all live in scripts/shared/. +_SHARED_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "shared") +if _SHARED_DIR not in sys.path: + sys.path.insert(0, _SHARED_DIR) + +import ci_lib +import docker_lib +import find_py_packages +import read_yml_config +import setup_logging +import shared_helpers + + +# Filename of this script's local comparator — copied INTO the test container. +BITCOMPARE_SCRIPT = "openifs_branch_bitcompare.py" + +# Inside-container paths used by the containerised comparison step. +INCONTAINER_CONTROL_TGZ = "/tmp/control_saved_norms.tgz" +INCONTAINER_CONTROL_DIR = "/tmp/control_saved_norms" +INCONTAINER_BITCOMPARE = f"/tmp/{BITCOMPARE_SCRIPT}" +INCONTAINER_REPORT = "/tmp/norms_report.txt" + +# Inside-container path that captures the ctest stage's stdout/stderr from +# `openifs-test.sh -t`. Copied out per-branch by export_test_output() so the +# host-side report can include it. +INCONTAINER_TEST_OUTPUT = "/tmp/openifs_test_output.txt" + + +def _control_cache_key(config): + """Cache-key suffix for the control SAVED_NORMS tarball (e.g. ``gcc14``). + + Keys the cache on the GCC base image so that different compilers don't + silently share the previous run's NORMS. + """ + return f"gcc{config['base_docker_image']}" + + +def parse_arguments(): + parser = argparse.ArgumentParser( + description=( + "CI test for OpenIFS: build control + test branches in Docker, " + "run openifs-test.sh -cbt in each, and bit-compare SAVED_NORMS " + "INSIDE the test container. Only a small text report is exported " + "to the host." + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("--config", "-c", type=str, required=True, + help="YAML configuration file (see config/ci_test_docker.yml)") + return parser.parse_args() + + +def ensure_base_image(config): + """Validate + pull the GCC base image. Mirrors create-oifs-docker.py.""" + logger = logging.getLogger(__name__) + base_image = f"gcc:{config['base_docker_image']}" + + logger.info(f"Validating base Docker image {base_image}...") + if not docker_lib.is_official_docker_image(base_image): + logger.error(f"Security check failed: '{base_image}' is not an approved official image") + sys.exit(1) + + if not docker_lib.check_docker_image_exists(base_image): + logger.info(f"Base image {base_image} not present locally, pulling...") + if not docker_lib.pull_docker_image(base_image): + logger.error(f"Failed to pull {base_image}") + sys.exit(1) + + +def build_branch_image(config, label, branch, build_dir): + """Stage the source for ``label`` into a per-label build dir and build the image. + + Source staging (clone vs. local copy, force_reclone, tag-suffix + derivation) is delegated to ``shared_helpers.stage_branch_source`` — + same routine used by ``ci-oifs-host.py``. Each branch needs its own + Docker build dir because Dockerfile.ci uses + ``COPY ${OPENIFS_DIR} ...`` and ``docker build`` ships the entire + build dir to the daemon. + + Returns the image tag. + """ + logger = logging.getLogger(__name__) + + try: + clone_dir, tag_suffix = shared_helpers.stage_branch_source( + config, label, build_dir, __file__, + ) + except FileNotFoundError as e: + logger.error(str(e)) + sys.exit(1) + + branch_build_dir = os.path.dirname(clone_dir) + branch_dockerfile = os.path.join( + branch_build_dir, + f"Dockerfile_ci_{config['openifs_version']}_{config['base_docker_image']}", + ) + + shutil.copyfile(config['docker_template'], branch_dockerfile) + branch_config = dict( + config, + openifs_branch=tag_suffix, + include_openifs_data_downloads=False, + ) + docker_lib.modify_dockerfile(branch_dockerfile, branch_config) + + image_tag = ( + f"openifs-{config['openifs_version']}-gcc{config['base_docker_image']}" + f":ci-{label}-{tag_suffix}" + ) + logger.info(f"Building image {image_tag} for {label} branch '{branch}'...") + docker_lib.build_docker_image( + branch_dockerfile, + image_tag, + branch_build_dir, + no_cache=config.get('force_rebuild', False), + ) + return image_tag + + +def start_container(label, image_tag, config): + """Start a detached bash container; return its name. + + By default, removes any pre-existing container with the same name first + (e.g. from a prior failed run). This behaviour is controlled by + ``remove_existing_container_before_run`` in the YAML and defaults to True. + """ + logger = logging.getLogger(__name__) + + container = f"oifs-ci-{label}" + if config.get('remove_existing_container_before_run', True): + subprocess.run( + ["docker", "rm", "-f", container], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + logger.info(f"Removed any pre-existing container named '{container}' before run") + + logger.info(f"Starting container '{container}' from {image_tag}") + subprocess.run( + ["docker", "run", "-dit", "--name", container, image_tag, "/bin/bash"], + check=True, + ) + return container + + +def run_openifs_tests(container, config): + """Configure + build with ``openifs-test.sh -cb``, then run ctest with ``-t``. + + Two docker exec calls so the ctest stage's stdout/stderr can be tee'd to + ``INCONTAINER_TEST_OUTPUT`` in isolation, without dragging in the + verbose configure/build output. Command strings are built by + ``ci_lib.build_test_commands`` — same source for the host CI driver. + """ + logger = logging.getLogger(__name__) + + src = f"source ~/{config['openifs_version']}/oifs-config.edit_me.sh" + cb_cmd, t_cmd = ci_lib.build_test_commands(config, src, INCONTAINER_TEST_OUTPUT) + + logger.info(f"Configure + build in '{container}': {cb_cmd}") + subprocess.run( + ["docker", "exec", container, "bash", "-lc", cb_cmd], + check=True, + ) + + logger.info(f"Running ctest in '{container}': {t_cmd}") + subprocess.run( + ["docker", "exec", container, "bash", "-lc", t_cmd], + check=True, + ) + + +def export_test_output(container, ci_reports, label): + """Copy ``INCONTAINER_TEST_OUTPUT`` from ``container`` into ``ci_reports``. + + Saved on the host as ``openifs_test_output_